EDM.scala 10.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
 * rico2edm
 * Copyright (C) 2021  UB Basel
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 *
 */


21
package ch.memobase.rico2edm.edm
22

23

24
import ch.memobase.rico2edm.edm
25
import ch.memobase.rico2edm.edm.subjects.{Aggregation, Concept, ModelXMLTransformer, Place, ProvidedCHO, TimeSpan, WebResource}
26

27
28
29
import java.time.format.DateTimeFormatter
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
Günter Hipler's avatar
Günter Hipler committed
30
import scala.util.{Success, Try}
31
import ujson.{Arr => JArr, Obj => JObj}
32

33

34
35
36
class EDM {

  private val edmRdfModel = new EDMRdf4jModel
37
38
39
40
41
  //quite a lot of examples with datetimeformatter
  //https://www.dariawan.com/tutorials/java/java-datetimeformatter-tutorial-examples/
  //decide which is more appropriate for OAI
  //private val dateTimeFormatter = DateTimeFormatter.ISO_DATE
  private val dateTimeFormatter = DateTimeFormatter.ISO_INSTANT
42
43
44
45
46

  def create(messageValue: String): Try[ExtractionResult[(String, String)]] = {
    Try {

      val graph = Extractors.jsonGraph(messageValue).get.arr
47
      val digitalObject = Extractors.digitalObject(graph).get
48
      val record = Extractors.record(graph)
49
50
51
      //val recordId = Extractors.recordId(record.get).get
      val shortRecordId = Extractors.recordIdShort(graph)(record.get).get

52

53
54
      val choExtraction = createChoObject(graph,record.get,digitalObject)
      val webExtraction = createWebResources(graph,record.get,digitalObject)
55

56
      //actually only one instance - correct?
57
      //val placeExtraction: ExtractionResult[Place] = createPlace(graph,record.get,digitalObject)
58
      val aggregationExtraction = createAggregation(graph,record.get,digitalObject)
59
60
      //val conceptExtraction: ExtractionResult[Concept] = createConcept(graph,record.get,digitalObject)
      //val timespanExtraction: ExtractionResult[TimeSpan] = createTimeSpan(graph,record.get,digitalObject)
61

62
63
64
      webExtraction.obj.foreach(webResource =>
        choExtraction.obj.getModel.addAll(webResource.getModel)
      )
65
      //choExtraction.obj.getModel.addAll(placeExtraction.obj.getModel)
66
67
68

      aggregationExtraction.map(agg =>  choExtraction.obj.getModel.addAll(agg.obj.getModel))
      //choExtraction.obj.getModel.addAll(aggregationExtraction.obj.getModel)
69
70
      //choExtraction.obj.getModel.addAll(conceptExtraction.obj.getModel)
      //choExtraction.obj.getModel.addAll(timespanExtraction.obj.getModel)
71

72
      val esObject = ModelXMLTransformer(model = choExtraction.obj.getModel,
73
        id = shortRecordId,
74
75
76
77
78
        recordset = Extractors.recordSetOrInstitution(record)("isPartOf")
          .map( ident => EDM.getInstitutionOrRecordsetIdent(ident)),
        institution = Extractors.recordSetOrInstitution(record)("heldBy")
          .map( identInstitution => EDM.getInstitutionOrRecordsetIdent(identInstitution))
      )
79

80
81
      //todo: by now we do not collect any infos and warnings
      ExtractionResult((
82
        shortRecordId,
83
        esObject
84
        )
85
      ,new ArrayBuffer[String]())
86
87
88
    }
  }

89
  def createChoObject(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[ProvidedCHO] = {
90

91
    val cho = new ProvidedCHO(Extractors.recordId(record).get)
92
93
94
95
96
97
98
99
100
101
102
103
104
105


    //at first we are looking for main title in blank nodes
    //if this is not successful we try to use the title property in record
    val titlesFromBlankNodes = Extractors.resourceTitles(graph)(record.value)
    if (titlesFromBlankNodes.nonEmpty) {
      titlesFromBlankNodes
        .foreach(c => cho.addTitel(Some(c)))
    } else {
      Extractors.title(record).foreach(c => cho.addTitel(Some(c)))
    }



106
107
108
109
    cho.addDescription(Extractors.dctAbstract(record))
    cho.addDescription(Extractors.scopeAndContent(record))
    cho.addDescription(Extractors.descriptiveNote(record))
    cho.addCreationDate(Extractors.creationDate(graph)(record.obj))
110

111
112

    //todo: nur name c._1 hächst wahrscheinlich weg!!!
113
    //todo: q.f.S. creator and contributor together with role??
114
    Extractors
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
      .resourceSourceOfRelationCreation(graph)(record.value)("creator")
      .foreach(c => cho.addCreator(Some(s"${c._1} / ${c._2}")))

    //add contributors from two different sources
    Extractors
      .resourceSourceOfRelationCreation(graph)(record.value)("contributor")
      .foreach(c => cho.addContributor(Some(s"${c._1} / ${c._2}")))
    Extractors
      .resourceContributorRDauP60441(graph)(record.value)
      .foreach(c => cho.addContributor(Some(c)))

    Extractors
      .identifiedBy(graph)(record.value)
      .foreach(c => cho.addIdentifier(Some(c)))

130
131
132
133
134
135
136
    Extractors
      .resourceAllLanguages(graph)(record.value)
      .foreach(c => cho.addLanguage(Some(c)))

    Extractors
      .publishedByGH(graph)(record.value)
      .foreach(c => cho.addPublisher(Some(c)))
137

138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
    Extractors
      .regulatedMultipleByHolder(graph)(record.value)
      .foreach(c => cho.addRights(Some(c)))

    Extractors
      .recordSource(record.value)
      .foreach(c => cho.addSource(Some(c)))

    Extractors
      .resourceSubjects(graph)(record.value)
      .foreach(c => cho.addSubject(Some(c)))


    Extractors
      .resourceGenres(graph)(record.value)
      .foreach(c => cho.addSubject(Some(c)))
154

155
156
157
158
    Extractors
      .dcTermsCreated(graph)(record.value)
      .foreach(c => cho.addDcTermsCreated(Some(c)))

159
160
161
162
163
164
165
166
167
168
169
170
171
    Extractors.resourceDCTermsExtent(Extractors.physicalObject(graph).get)
      .foreach(c => cho.addDcTermsExtent(Some(c)))

    Extractors
      .dcTermsIssuedTemporal(graph)(record.value)("issued")
      .foreach(c => cho.addDcTermsIssued(Some(c)))

    Extractors.resourceDCTermsMedium(graph)
      .foreach(c => cho.addDcTermsMedium(Some(c)))

    Extractors.dcTermsSpatial(graph)(record.value)
      .foreach(c => cho.addDcTermsSpatial(Some(c)))

172
173
174
    Extractors.dcTermsSpatialCapture(graph)(record.value)
      .foreach(c => cho.addDcTermsSpatial(Some(c)))

175
176
177
    Extractors.dcTermsIssuedTemporal(graph)(record.value)("temporal")
      .foreach(c => cho.addDcTermsTemporal(Some(c)))

178
179
    Extractors.edmType(record.value)
      .foreach(c => cho.addEdmType(Some(c)))
180

181

182
    ExtractionResult(cho)
183

184
185
  }

186
  def createWebResources(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[Option[WebResource]] = {
187
188

    //how many digital objects are part of a document??
189
190
191
192
    //actually I suppose only one web-resource because we have only one digital object


    val webresource: Option[WebResource] = Extractors.webResourceID(graph)(digitalObject).map(id =>
193
      WebResource(id))
194
195
196
197
198
199
200
201
202

    if (webresource.isDefined) {
      Extractors.usageRegulationDigitalObject(graph)(digitalObject.value)
        .foreach(c => webresource.get.addRights(Option(c)))

      Extractors.mimeType(digitalObject).foreach(format => webresource.get.addDcFormat(Option(format)))
    }

    ExtractionResult(webresource)
203
204
  }

205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
  def createPlace(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[Place] = {

    //how many places instances are available??
    val place = new Place(Extractors.recordId(record).get)

    Extractors.dcTermsSpatial(graph)(record.value)
      .foreach(c => place.addSkosPrefLabel(Some(c)))

    //val place: Option[List[WebResource]] = Extractors.recordId(record).map(id => List[Place](new Place(id)))
    ExtractionResult(place )
  }


  def createConcept(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[Concept] = {

    //how many places instances are available??
    //is it correct to use recordId
    val concept = new Concept(Extractors.recordId(record).get)
    //val concept = new Concept("http://iwerk.ch/1234")
    concept.addSkosPrefLabel(Some("only test for concept"))

    //val place: Option[List[WebResource]] = Extractors.recordId(record).map(id => List[Place](new Place(id)))
    ExtractionResult(concept)
  }


231
  def createAggregation(graph: JArr, record: JObj, digitalObject: JObj): Option[ExtractionResult[Aggregation]] = {
232
233
234

    //how many places instances are available??
    //is it correct to use recordId
235
    //use apply method because we do not know how to build the identifier for Aggregations right now
236
237
238
239
240
    val recordIdShort = Extractors.recordIdShort(graph)(record)
    if (recordIdShort.isDefined) {

      val aggregation = Aggregation(recordIdShort.get)

241
      aggregation.addIsShownAt(recordIdShort)
242

243
      aggregation.addAggregatedCHO(recordIdShort)
244

Günter Hipler's avatar
Günter Hipler committed
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
      //todo: ich denke, die Implementierung als Ganzes ist noch nicht konsistent
      // haben wir immer ein digitales Objekt
      // (behalten wir den Filter "hasNoDigitalObject" - Export auch von pur metadata über OAI),
      // wenn nein (deswegen hier try) könnte man das aber
      // noch konsistenter implementieren
      //
      Try { Extractors.digitalObjectIdShort(graph)(digitalObject) } match {
        case Success(value) =>
          aggregation.addIsShownBy(value)
          Extractors.ricoType(record) match {
            case Some(value) =>
              if (value == "Foto") {
                aggregation.addEDMObjectFoto(Option(value))
              }else {
                aggregation.addEDMObjectNoFoto(Option(value))
              }
            case None =>

          }
264
265
266
267
          Extractors.usageRegulationDigitalObject(graph)(digitalObject.value) match {
            case head :: Nil =>aggregation.addRights(Option(head))
            case _ =>
          }
Günter Hipler's avatar
Günter Hipler committed
268
269
        case _ =>
      }
270
271
272
273
274
275
276
277

      Option(ExtractionResult(aggregation))
    } else {

      //todo (Aggregation_1): was passiert, wenn wir keine Aggregation bilden können? - Exception und der rico record wird nicht nach Europeana geliefert??

      None
    }
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294

  }


  def createTimeSpan(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[TimeSpan] = {

    //how many places instances are available??
    //is it correct to use recordId
    val timespan = new TimeSpan(Extractors.recordId(record).get)
    //val timespan = new TimeSpan("http://iwerk.ch/891011")
    timespan.addSkosPrefLabel(Some("only test for timespan"))

    //val place: Option[List[WebResource]] = Extractors.recordId(record).map(id => List[Place](new Place(id)))
    ExtractionResult(timespan)
  }


295

296
297
298
  /*
  todo: my assumption: recordset and institution have cardinality n
   */
299
  //def extractRecordset(graph: JArr, record: JObj): List[String] = ???
300

301
  //def extractInstitution(graph: JArr, record: JObj): List[String] = ???
302
303
304
305
306
307
308
309


}

object EDM {

  private val isHttpIdentifier = "^http.*".r
  def getInstitutionOrRecordsetIdent(ident: String ): String = ident.substring(ident.lastIndexOf(("/")) + 1)
310
311
312
}

case class ExtractionResult[T](obj: T, warnings: mutable.Buffer[String] = mutable.Buffer())
313
314
315