EDM.scala 12 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
 * rico2edm
 * Copyright (C) 2021  UB Basel
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 *
 */


21
package ch.memobase.rico2edm.edm
22

23

24
import ch.memobase.rico2edm.edm
25
import ch.memobase.rico2edm.edm.subjects.{Aggregation, Concept, ModelXMLTransformer, Place, ProvidedCHO, TimeSpan, WebResource}
26
import ch.memobase.rico2edm.utils.{ElasticSearchClientWrapper, Helper}
27

28
29
30
import java.time.format.DateTimeFormatter
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
Günter Hipler's avatar
Günter Hipler committed
31
import scala.util.{Success, Try}
32
import ujson.{Arr => JArr, Obj => JObj}
33

34

35
36
class EDM {

37
  //private val edmRdfModel = new EDMRdf4jModel
38
39
40
41
42
  //quite a lot of examples with datetimeformatter
  //https://www.dariawan.com/tutorials/java/java-datetimeformatter-tutorial-examples/
  //decide which is more appropriate for OAI
  //private val dateTimeFormatter = DateTimeFormatter.ISO_DATE
  private val dateTimeFormatter = DateTimeFormatter.ISO_INSTANT
43
44
45
46
47

  def create(messageValue: String): Try[ExtractionResult[(String, String)]] = {
    Try {

      val graph = Extractors.jsonGraph(messageValue).get.arr
48
      val digitalObject = Extractors.digitalObject(graph).get
49
      val record = Extractors.record(graph)
50
51
52
      //val recordId = Extractors.recordId(record.get).get
      val shortRecordId = Extractors.recordIdShort(graph)(record.get).get

53

54
55
      val choExtraction = createChoObject(graph,record.get,digitalObject)
      val webExtraction = createWebResources(graph,record.get,digitalObject)
56

57
      //actually only one instance - correct?
58
      //val placeExtraction: ExtractionResult[Place] = createPlace(graph,record.get,digitalObject)
59
      val aggregationExtraction = createAggregation(graph,record.get,digitalObject)
60
61
      //val conceptExtraction: ExtractionResult[Concept] = createConcept(graph,record.get,digitalObject)
      //val timespanExtraction: ExtractionResult[TimeSpan] = createTimeSpan(graph,record.get,digitalObject)
62

63
64
65
      webExtraction.obj.foreach(webResource =>
        choExtraction.obj.getModel.addAll(webResource.getModel)
      )
66
      //choExtraction.obj.getModel.addAll(placeExtraction.obj.getModel)
67
68
69

      aggregationExtraction.map(agg =>  choExtraction.obj.getModel.addAll(agg.obj.getModel))
      //choExtraction.obj.getModel.addAll(aggregationExtraction.obj.getModel)
70
71
      //choExtraction.obj.getModel.addAll(conceptExtraction.obj.getModel)
      //choExtraction.obj.getModel.addAll(timespanExtraction.obj.getModel)
72

73
      val esObject = ModelXMLTransformer(model = choExtraction.obj.getModel,
74
        id = shortRecordId,
75
        recordset = Extractors.recordSetOrInstitution(record.get)("isPartOf")
76
          .map( ident => EDM.getInstitutionOrRecordsetIdent(ident)),
77
        institution = Extractors.recordSetOrInstitution(record.get)("heldBy")
78
79
          .map( identInstitution => EDM.getInstitutionOrRecordsetIdent(identInstitution))
      )
80

81
82
      //todo: by now we do not collect any infos and warnings
      ExtractionResult((
83
        shortRecordId,
84
        esObject
85
        )
86
      ,new ArrayBuffer[String]())
87
88
89
    }
  }

90
  def createChoObject(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[ProvidedCHO] = {
91

92
    val cho = new ProvidedCHO(Extractors.recordId(record).get)
93
94
95
96
97
98
99
100
101
102
103
104
105
106


    //at first we are looking for main title in blank nodes
    //if this is not successful we try to use the title property in record
    val titlesFromBlankNodes = Extractors.resourceTitles(graph)(record.value)
    if (titlesFromBlankNodes.nonEmpty) {
      titlesFromBlankNodes
        .foreach(c => cho.addTitel(Some(c)))
    } else {
      Extractors.title(record).foreach(c => cho.addTitel(Some(c)))
    }



107
108
109
    cho.addDescription(Extractors.dctAbstract(record))
    cho.addDescription(Extractors.scopeAndContent(record))
    cho.addDescription(Extractors.descriptiveNote(record))
Silvia Witzig's avatar
Silvia Witzig committed
110
    //cho.addCreationDate(Extractors.creationDate(graph)(record.obj))
111

112
113

    //todo: nur name c._1 hächst wahrscheinlich weg!!!
114
    //todo: q.f.S. creator and contributor together with role??
115
    Extractors
116
      .resourceSourceOfRelationCreation(graph)(record.value)("creator")
117
      .foreach(c => cho.addCreator(Some(c)))
118
119
120
121

    //add contributors from two different sources
    Extractors
      .resourceSourceOfRelationCreation(graph)(record.value)("contributor")
122
      .foreach(c => cho.addContributor(Some(c)))
123
124
125
126
127
128
129
130
    Extractors
      .resourceContributorRDauP60441(graph)(record.value)
      .foreach(c => cho.addContributor(Some(c)))

    Extractors
      .identifiedBy(graph)(record.value)
      .foreach(c => cho.addIdentifier(Some(c)))

131
132
    Extractors
      .resourceAllLanguages(graph)(record.value)
133
      .foreach(c => cho.addLanguage(Helper.getLanguageCode(c)))
134
135
136
137

    Extractors
      .publishedByGH(graph)(record.value)
      .foreach(c => cho.addPublisher(Some(c)))
138

139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
    Extractors
      .regulatedMultipleByHolder(graph)(record.value)
      .foreach(c => cho.addRights(Some(c)))

    Extractors
      .recordSource(record.value)
      .foreach(c => cho.addSource(Some(c)))

    Extractors
      .resourceSubjects(graph)(record.value)
      .foreach(c => cho.addSubject(Some(c)))


    Extractors
      .resourceGenres(graph)(record.value)
Günter Hipler's avatar
Günter Hipler committed
154
      .foreach(c => cho.addGenre(Some(c)))
155

156
157
158
159
    Extractors
      .dcTermsCreated(graph)(record.value)
      .foreach(c => cho.addDcTermsCreated(Some(c)))

160
161
162
163
164
165
166
    Extractors.resourceDCTermsExtent(Extractors.physicalObject(graph).get)
      .foreach(c => cho.addDcTermsExtent(Some(c)))

    Extractors
      .dcTermsIssuedTemporal(graph)(record.value)("issued")
      .foreach(c => cho.addDcTermsIssued(Some(c)))

167
    Extractors.resourceDCTermsMediumWikidata(graph)
168
169
170
171
172
      .foreach(c => cho.addDcTermsMedium(Some(c)))

    Extractors.dcTermsSpatial(graph)(record.value)
      .foreach(c => cho.addDcTermsSpatial(Some(c)))

173
174
175
    Extractors.dcTermsSpatialCapture(graph)(record.value)
      .foreach(c => cho.addDcTermsSpatial(Some(c)))

176
177
178
    Extractors.dcTermsIssuedTemporal(graph)(record.value)("temporal")
      .foreach(c => cho.addDcTermsTemporal(Some(c)))

179
180
    Extractors.edmType(record.value)
      .foreach(c => cho.addEdmType(Some(c)))
181

182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
    Extractors
      .recordSetOrInstitution(record.value)("heldBy")
      .foreach(c => Helper.getInstitutionCoord(
        EDM.getInstitutionOrRecordsetIdent(c)
      ).map(indexValue =>
        cho.addCurrentLocation(Some(indexValue))))

    Extractors
      .recordSetOrInstitution(record.value)("isPartOf")
      .foreach(c => ElasticSearchClientWrapper.getRecordsetName(
        EDM.getInstitutionOrRecordsetIdent(c)
      ).map(indexValue =>
        cho.addIsPartOf(Some(indexValue))))


    /*
        recordset = Extractors.recordSetOrInstitution(record.get)("isPartOf")
          .map( ident => EDM.getInstitutionOrRecordsetIdent(ident)),

 */

203

204
    ExtractionResult(cho)
205

206
207
  }

208
  def createWebResources(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[Option[WebResource]] = {
209
210

    //how many digital objects are part of a document??
211
212
213
214
    //actually I suppose only one web-resource because we have only one digital object


    val webresource: Option[WebResource] = Extractors.webResourceID(graph)(digitalObject).map(id =>
215
      WebResource(id))
216
217
218
219
220
221
222
223
224

    if (webresource.isDefined) {
      Extractors.usageRegulationDigitalObject(graph)(digitalObject.value)
        .foreach(c => webresource.get.addRights(Option(c)))

      Extractors.mimeType(digitalObject).foreach(format => webresource.get.addDcFormat(Option(format)))
    }

    ExtractionResult(webresource)
225
226
  }

227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
  def createPlace(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[Place] = {

    //how many places instances are available??
    val place = new Place(Extractors.recordId(record).get)

    Extractors.dcTermsSpatial(graph)(record.value)
      .foreach(c => place.addSkosPrefLabel(Some(c)))

    //val place: Option[List[WebResource]] = Extractors.recordId(record).map(id => List[Place](new Place(id)))
    ExtractionResult(place )
  }


  def createConcept(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[Concept] = {

    //how many places instances are available??
    //is it correct to use recordId
    val concept = new Concept(Extractors.recordId(record).get)
    //val concept = new Concept("http://iwerk.ch/1234")
    concept.addSkosPrefLabel(Some("only test for concept"))

    //val place: Option[List[WebResource]] = Extractors.recordId(record).map(id => List[Place](new Place(id)))
    ExtractionResult(concept)
  }


253
  def createAggregation(graph: JArr, record: JObj, digitalObject: JObj): Option[ExtractionResult[Aggregation]] = {
254
255
256

    //how many places instances are available??
    //is it correct to use recordId
257
    //use apply method because we do not know how to build the identifier for Aggregations right now
258
259
260
261
262
    val recordIdShort = Extractors.recordIdShort(graph)(record)
    if (recordIdShort.isDefined) {

      val aggregation = Aggregation(recordIdShort.get)

263
      aggregation.addIsShownAt(recordIdShort)
264

265
      aggregation.addAggregatedCHO(recordIdShort)
266

Günter Hipler's avatar
Günter Hipler committed
267
268
269
270
271
272
273
      //todo: ich denke, die Implementierung als Ganzes ist noch nicht konsistent
      // haben wir immer ein digitales Objekt
      // (behalten wir den Filter "hasNoDigitalObject" - Export auch von pur metadata über OAI),
      // wenn nein (deswegen hier try) könnte man das aber
      // noch konsistenter implementieren
      //
      Try { Extractors.digitalObjectIdShort(graph)(digitalObject) } match {
274
275
        case Success(digiObjIdShort) =>
          aggregation.addIsShownBy(digiObjIdShort)
Günter Hipler's avatar
Günter Hipler committed
276
          Extractors.ricoType(record) match {
277
278
279
            case Some(ricoType) =>
              if (ricoType == "Foto") {
                aggregation.addEDMObjectFoto(digiObjIdShort)
Günter Hipler's avatar
Günter Hipler committed
280
              }else {
281
                aggregation.addEDMObjectNoFoto(digiObjIdShort)
Günter Hipler's avatar
Günter Hipler committed
282
283
284
285
              }
            case None =>

          }
286
287
288
289
          Extractors.usageRegulationDigitalObject(graph)(digitalObject.value) match {
            case head :: Nil =>aggregation.addRights(Option(head))
            case _ =>
          }
Günter Hipler's avatar
Günter Hipler committed
290
291
        case _ =>
      }
292

293
294
295
      //fixed value for provider
      aggregation.addProvider(Some("Memoriav"))

296
297
298
299
300
301
302
303
      Extractors
        .recordSetOrInstitution(record.value)("heldBy")
        .foreach(c => ElasticSearchClientWrapper.getHeldBy(
          EDM.getInstitutionOrRecordsetIdent(c)
        ).map(indexValue =>
          aggregation.addDataProvider(Some(indexValue))))


304
305
306
307
308
309
310
      Option(ExtractionResult(aggregation))
    } else {

      //todo (Aggregation_1): was passiert, wenn wir keine Aggregation bilden können? - Exception und der rico record wird nicht nach Europeana geliefert??

      None
    }
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327

  }


  def createTimeSpan(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[TimeSpan] = {

    //how many places instances are available??
    //is it correct to use recordId
    val timespan = new TimeSpan(Extractors.recordId(record).get)
    //val timespan = new TimeSpan("http://iwerk.ch/891011")
    timespan.addSkosPrefLabel(Some("only test for timespan"))

    //val place: Option[List[WebResource]] = Extractors.recordId(record).map(id => List[Place](new Place(id)))
    ExtractionResult(timespan)
  }


328

329
330
331
  /*
  todo: my assumption: recordset and institution have cardinality n
   */
332
  //def extractRecordset(graph: JArr, record: JObj): List[String] = ???
333

334
  //def extractInstitution(graph: JArr, record: JObj): List[String] = ???
335
336
337
338
339
340
341
342


}

object EDM {

  private val isHttpIdentifier = "^http.*".r
  def getInstitutionOrRecordsetIdent(ident: String ): String = ident.substring(ident.lastIndexOf(("/")) + 1)
343
344
345
}

case class ExtractionResult[T](obj: T, warnings: mutable.Buffer[String] = mutable.Buffer())
346
347
348