EDM.scala 9.75 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
 * rico2edm
 * Copyright (C) 2021  UB Basel
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 *
 */


21
package ch.memobase.rico2edm.edm
22

23

24
import ch.memobase.rico2edm.edm
25
import ch.memobase.rico2edm.edm.subjects.{Aggregation, Concept, ModelXMLTransformer, Place, ProvidedCHO, TimeSpan, WebResource}
26

27
28
29
30
import java.time.format.DateTimeFormatter
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import scala.util.Try
31
import ujson.{Arr => JArr, Obj => JObj}
32

33

34
35
36
class EDM {

  private val edmRdfModel = new EDMRdf4jModel
37
38
39
40
41
  //quite a lot of examples with datetimeformatter
  //https://www.dariawan.com/tutorials/java/java-datetimeformatter-tutorial-examples/
  //decide which is more appropriate for OAI
  //private val dateTimeFormatter = DateTimeFormatter.ISO_DATE
  private val dateTimeFormatter = DateTimeFormatter.ISO_INSTANT
42
43
44
45
46

  def create(messageValue: String): Try[ExtractionResult[(String, String)]] = {
    Try {

      val graph = Extractors.jsonGraph(messageValue).get.arr
47
      val digitalObject = Extractors.digitalObject(graph).get
48
      val record = Extractors.record(graph)
49
50
51
      //val recordId = Extractors.recordId(record.get).get
      val shortRecordId = Extractors.recordIdShort(graph)(record.get).get

52

53
54
      val choExtraction = createChoObject(graph,record.get,digitalObject)
      val webExtraction = createWebResources(graph,record.get,digitalObject)
55

56
      //actually only one instance - correct?
57
      //val placeExtraction: ExtractionResult[Place] = createPlace(graph,record.get,digitalObject)
58
      val aggregationExtraction = createAggregation(graph,record.get,digitalObject)
59
60
      //val conceptExtraction: ExtractionResult[Concept] = createConcept(graph,record.get,digitalObject)
      //val timespanExtraction: ExtractionResult[TimeSpan] = createTimeSpan(graph,record.get,digitalObject)
61

62
63
64
      webExtraction.obj.foreach(webResource =>
        choExtraction.obj.getModel.addAll(webResource.getModel)
      )
65
      //choExtraction.obj.getModel.addAll(placeExtraction.obj.getModel)
66
67
68

      aggregationExtraction.map(agg =>  choExtraction.obj.getModel.addAll(agg.obj.getModel))
      //choExtraction.obj.getModel.addAll(aggregationExtraction.obj.getModel)
69
70
      //choExtraction.obj.getModel.addAll(conceptExtraction.obj.getModel)
      //choExtraction.obj.getModel.addAll(timespanExtraction.obj.getModel)
71

72
      val esObject = ModelXMLTransformer(model = choExtraction.obj.getModel,
73
        id = shortRecordId,
74
75
76
77
78
        recordset = Extractors.recordSetOrInstitution(record)("isPartOf")
          .map( ident => EDM.getInstitutionOrRecordsetIdent(ident)),
        institution = Extractors.recordSetOrInstitution(record)("heldBy")
          .map( identInstitution => EDM.getInstitutionOrRecordsetIdent(identInstitution))
      )
79

80
81
      //todo: by now we do not collect any infos and warnings
      ExtractionResult((
82
        shortRecordId,
83
        esObject
84
        )
85
      ,new ArrayBuffer[String]())
86
87
88
    }
  }

89
  def createChoObject(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[ProvidedCHO] = {
90

91
    val cho = new ProvidedCHO(Extractors.recordId(record).get)
92
93
94
95
96
97
98
99
100
101
102
103
104
105


    //at first we are looking for main title in blank nodes
    //if this is not successful we try to use the title property in record
    val titlesFromBlankNodes = Extractors.resourceTitles(graph)(record.value)
    if (titlesFromBlankNodes.nonEmpty) {
      titlesFromBlankNodes
        .foreach(c => cho.addTitel(Some(c)))
    } else {
      Extractors.title(record).foreach(c => cho.addTitel(Some(c)))
    }



106
107
108
109
    cho.addDescription(Extractors.dctAbstract(record))
    cho.addDescription(Extractors.scopeAndContent(record))
    cho.addDescription(Extractors.descriptiveNote(record))
    cho.addCreationDate(Extractors.creationDate(graph)(record.obj))
110

111
    //todo: q.f.S. creator and contributor together with role??
112
    Extractors
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
      .resourceSourceOfRelationCreation(graph)(record.value)("creator")
      .foreach(c => cho.addCreator(Some(s"${c._1} / ${c._2}")))

    //add contributors from two different sources
    Extractors
      .resourceSourceOfRelationCreation(graph)(record.value)("contributor")
      .foreach(c => cho.addContributor(Some(s"${c._1} / ${c._2}")))
    Extractors
      .resourceContributorRDauP60441(graph)(record.value)
      .foreach(c => cho.addContributor(Some(c)))

    Extractors
      .identifiedBy(graph)(record.value)
      .foreach(c => cho.addIdentifier(Some(c)))

128
129
130
131
132
133
134
    Extractors
      .resourceAllLanguages(graph)(record.value)
      .foreach(c => cho.addLanguage(Some(c)))

    Extractors
      .publishedByGH(graph)(record.value)
      .foreach(c => cho.addPublisher(Some(c)))
135

136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
    Extractors
      .regulatedMultipleByHolder(graph)(record.value)
      .foreach(c => cho.addRights(Some(c)))

    Extractors
      .recordSource(record.value)
      .foreach(c => cho.addSource(Some(c)))

    Extractors
      .resourceSubjects(graph)(record.value)
      .foreach(c => cho.addSubject(Some(c)))


    Extractors
      .resourceGenres(graph)(record.value)
      .foreach(c => cho.addSubject(Some(c)))
152

153
154
155
156
    Extractors
      .dcTermsCreated(graph)(record.value)
      .foreach(c => cho.addDcTermsCreated(Some(c)))

157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
    Extractors.resourceDCTermsExtent(Extractors.physicalObject(graph).get)
      .foreach(c => cho.addDcTermsExtent(Some(c)))

    Extractors
      .dcTermsIssuedTemporal(graph)(record.value)("issued")
      .foreach(c => cho.addDcTermsIssued(Some(c)))

    Extractors.resourceDCTermsMedium(graph)
      .foreach(c => cho.addDcTermsMedium(Some(c)))

    Extractors.dcTermsSpatial(graph)(record.value)
      .foreach(c => cho.addDcTermsSpatial(Some(c)))

    Extractors.dcTermsIssuedTemporal(graph)(record.value)("temporal")
      .foreach(c => cho.addDcTermsTemporal(Some(c)))

173
174
    Extractors.edmType(record.value)
      .foreach(c => cho.addEdmType(Some(c)))
175

176

177
    ExtractionResult(cho)
178

179
180
  }

181
  def createWebResources(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[Option[WebResource]] = {
182
183

    //how many digital objects are part of a document??
184
185
186
187
    //actually I suppose only one web-resource because we have only one digital object


    val webresource: Option[WebResource] = Extractors.webResourceID(graph)(digitalObject).map(id =>
188
      WebResource(id))
189
190
191
192
193
194
195
196
197

    if (webresource.isDefined) {
      Extractors.usageRegulationDigitalObject(graph)(digitalObject.value)
        .foreach(c => webresource.get.addRights(Option(c)))

      Extractors.mimeType(digitalObject).foreach(format => webresource.get.addDcFormat(Option(format)))
    }

    ExtractionResult(webresource)
198
199
  }

200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
  def createPlace(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[Place] = {

    //how many places instances are available??
    val place = new Place(Extractors.recordId(record).get)

    Extractors.dcTermsSpatial(graph)(record.value)
      .foreach(c => place.addSkosPrefLabel(Some(c)))

    //val place: Option[List[WebResource]] = Extractors.recordId(record).map(id => List[Place](new Place(id)))
    ExtractionResult(place )
  }


  def createConcept(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[Concept] = {

    //how many places instances are available??
    //is it correct to use recordId
    val concept = new Concept(Extractors.recordId(record).get)
    //val concept = new Concept("http://iwerk.ch/1234")
    concept.addSkosPrefLabel(Some("only test for concept"))

    //val place: Option[List[WebResource]] = Extractors.recordId(record).map(id => List[Place](new Place(id)))
    ExtractionResult(concept)
  }


226
  def createAggregation(graph: JArr, record: JObj, digitalObject: JObj): Option[ExtractionResult[Aggregation]] = {
227
228
229

    //how many places instances are available??
    //is it correct to use recordId
230
    //use apply method because we do not know how to build the identifier for Aggregations right now
231
232
233
234
235
    val recordIdShort = Extractors.recordIdShort(graph)(record)
    if (recordIdShort.isDefined) {

      val aggregation = Aggregation(recordIdShort.get)

236
      aggregation.addIsShownAt(recordIdShort)
237

238
      aggregation.addAggregatedCHO(recordIdShort)
239
240
241
242
243
244
245
246
247


      Option(ExtractionResult(aggregation))
    } else {

      //todo (Aggregation_1): was passiert, wenn wir keine Aggregation bilden können? - Exception und der rico record wird nicht nach Europeana geliefert??

      None
    }
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264

  }


  def createTimeSpan(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[TimeSpan] = {

    //how many places instances are available??
    //is it correct to use recordId
    val timespan = new TimeSpan(Extractors.recordId(record).get)
    //val timespan = new TimeSpan("http://iwerk.ch/891011")
    timespan.addSkosPrefLabel(Some("only test for timespan"))

    //val place: Option[List[WebResource]] = Extractors.recordId(record).map(id => List[Place](new Place(id)))
    ExtractionResult(timespan)
  }


265

266
267
268
  /*
  todo: my assumption: recordset and institution have cardinality n
   */
269
  //def extractRecordset(graph: JArr, record: JObj): List[String] = ???
270

271
  //def extractInstitution(graph: JArr, record: JObj): List[String] = ???
272
273
274
275
276
277
278
279


}

object EDM {

  private val isHttpIdentifier = "^http.*".r
  def getInstitutionOrRecordsetIdent(ident: String ): String = ident.substring(ident.lastIndexOf(("/")) + 1)
280
281
282
}

case class ExtractionResult[T](obj: T, warnings: mutable.Buffer[String] = mutable.Buffer())
283
284
285