/* * rico2edm * Copyright (C) 2021 UB Basel * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * */ package ch.memobase.rico2edm.edm import ch.memobase.rico2edm.edm import ch.memobase.rico2edm.edm.subjects.{Aggregation, Concept, ModelXMLTransformer, Place, ProvidedCHO, TimeSpan, WebResource} import ch.memobase.rico2edm.utils.{ElasticSearchClientWrapper, Helper} import java.time.format.DateTimeFormatter import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import scala.util.{Success, Try} import ujson.{Arr => JArr, Obj => JObj} class EDM { //private val edmRdfModel = new EDMRdf4jModel //quite a lot of examples with datetimeformatter //https://www.dariawan.com/tutorials/java/java-datetimeformatter-tutorial-examples/ //decide which is more appropriate for OAI //private val dateTimeFormatter = DateTimeFormatter.ISO_DATE private val dateTimeFormatter = DateTimeFormatter.ISO_INSTANT def create(messageValue: String): Try[ExtractionResult[(String, String)]] = { Try { val graph = Extractors.jsonGraph(messageValue).get.arr val digitalObject = Extractors.digitalObject(graph).get val record = Extractors.record(graph) //val recordId = Extractors.recordId(record.get).get val shortRecordId = Extractors.recordIdShort(graph)(record.get).get val choExtraction = createChoObject(graph,record.get,digitalObject) val webExtraction = createWebResources(graph,record.get,digitalObject) //actually only one instance - correct? //val placeExtraction: ExtractionResult[Place] = createPlace(graph,record.get,digitalObject) val aggregationExtraction = createAggregation(graph,record.get,digitalObject) //val conceptExtraction: ExtractionResult[Concept] = createConcept(graph,record.get,digitalObject) //val timespanExtraction: ExtractionResult[TimeSpan] = createTimeSpan(graph,record.get,digitalObject) webExtraction.obj.foreach(webResource => choExtraction.obj.getModel.addAll(webResource.getModel) ) //choExtraction.obj.getModel.addAll(placeExtraction.obj.getModel) aggregationExtraction.map(agg => choExtraction.obj.getModel.addAll(agg.obj.getModel)) //choExtraction.obj.getModel.addAll(aggregationExtraction.obj.getModel) //choExtraction.obj.getModel.addAll(conceptExtraction.obj.getModel) //choExtraction.obj.getModel.addAll(timespanExtraction.obj.getModel) val esObject = ModelXMLTransformer(model = choExtraction.obj.getModel, id = shortRecordId, recordset = Extractors.recordSetOrInstitution(record.get)("isPartOf") .map( ident => EDM.getInstitutionOrRecordsetIdent(ident)), institution = Extractors.recordSetOrInstitution(record.get)("heldBy") .map( identInstitution => EDM.getInstitutionOrRecordsetIdent(identInstitution)) ) //todo: by now we do not collect any infos and warnings ExtractionResult(( shortRecordId, esObject ) ,new ArrayBuffer[String]()) } } def createChoObject(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[ProvidedCHO] = { val cho = new ProvidedCHO(Extractors.recordId(record).get) //at first we are looking for main title in blank nodes //if this is not successful we try to use the title property in record val titlesFromBlankNodes = Extractors.resourceTitles(graph)(record.value) if (titlesFromBlankNodes.nonEmpty) { titlesFromBlankNodes .foreach(c => cho.addTitel(Some(c))) } else { Extractors.title(record).foreach(c => cho.addTitel(Some(c))) } cho.addDescription(Extractors.dctAbstract(record)) cho.addDescription(Extractors.scopeAndContent(record)) cho.addDescription(Extractors.descriptiveNote(record)) //cho.addCreationDate(Extractors.creationDate(graph)(record.obj)) //todo: nur name c._1 hächst wahrscheinlich weg!!! //todo: q.f.S. creator and contributor together with role?? Extractors .resourceSourceOfRelationCreation(graph)(record.value)("creator") .foreach(c => cho.addCreator(Some(c))) //add contributors from two different sources Extractors .resourceSourceOfRelationCreation(graph)(record.value)("contributor") .foreach(c => cho.addContributor(Some(c))) Extractors .resourceContributorRDauP60441(graph)(record.value) .foreach(c => cho.addContributor(Some(c))) Extractors .identifiedBy(graph)(record.value) .foreach(c => cho.addIdentifier(Some(c))) Extractors .resourceAllLanguages(graph)(record.value) .foreach(c => cho.addLanguage(Helper.getLanguageCode(c))) Extractors .publishedByGH(graph)(record.value) .foreach(c => cho.addPublisher(Some(c))) Extractors .regulatedMultipleByHolder(graph)(record.value) .foreach(c => cho.addRights(Some(c))) Extractors .recordSource(record.value) .foreach(c => cho.addSource(Some(c))) Extractors .resourceSubjects(graph)(record.value) .foreach(c => cho.addSubject(Some(c))) Extractors .resourceGenres(graph)(record.value) .foreach(c => cho.addGenre(Some(c))) Extractors .dcTermsCreated(graph)(record.value) .foreach(c => cho.addDcTermsCreated(Some(c))) Extractors.resourceDCTermsExtent(Extractors.physicalObject(graph).get) .foreach(c => cho.addDcTermsExtent(Some(c))) Extractors .dcTermsIssuedTemporal(graph)(record.value)("issued") .foreach(c => cho.addDcTermsIssued(Some(c))) Extractors.resourceDCTermsMediumWikidata(graph) .foreach(c => cho.addDcTermsMedium(Some(c))) Extractors.dcTermsSpatial(graph)(record.value) .foreach(c => cho.addDcTermsSpatial(Some(c))) Extractors.dcTermsSpatialCapture(graph)(record.value) .foreach(c => cho.addDcTermsSpatial(Some(c))) Extractors.dcTermsIssuedTemporal(graph)(record.value)("temporal") .foreach(c => cho.addDcTermsTemporal(Some(c))) Extractors.edmType(record.value) .foreach(c => cho.addEdmType(Some(c))) Extractors .recordSetOrInstitution(record.value)("heldBy") .foreach(c => Helper.getInstitutionCoord( EDM.getInstitutionOrRecordsetIdent(c) ).map(indexValue => cho.addCurrentLocation(Some(indexValue)))) Extractors .recordSetOrInstitution(record.value)("isPartOf") .foreach(c => ElasticSearchClientWrapper.getRecordsetName( EDM.getInstitutionOrRecordsetIdent(c) ).map(indexValue => cho.addIsPartOf(Some(indexValue)))) /* recordset = Extractors.recordSetOrInstitution(record.get)("isPartOf") .map( ident => EDM.getInstitutionOrRecordsetIdent(ident)), */ ExtractionResult(cho) } def createWebResources(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[Option[WebResource]] = { //how many digital objects are part of a document?? //actually I suppose only one web-resource because we have only one digital object val webresource: Option[WebResource] = Extractors.webResourceID(graph)(digitalObject).map(id => WebResource(id)) if (webresource.isDefined) { Extractors.usageRegulationDigitalObject(graph)(digitalObject.value) .foreach(c => webresource.get.addRights(Option(c))) Extractors.mimeType(digitalObject).foreach(format => webresource.get.addDcFormat(Option(format))) } ExtractionResult(webresource) } def createPlace(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[Place] = { //how many places instances are available?? val place = new Place(Extractors.recordId(record).get) Extractors.dcTermsSpatial(graph)(record.value) .foreach(c => place.addSkosPrefLabel(Some(c))) //val place: Option[List[WebResource]] = Extractors.recordId(record).map(id => List[Place](new Place(id))) ExtractionResult(place ) } def createConcept(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[Concept] = { //how many places instances are available?? //is it correct to use recordId val concept = new Concept(Extractors.recordId(record).get) //val concept = new Concept("http://iwerk.ch/1234") concept.addSkosPrefLabel(Some("only test for concept")) //val place: Option[List[WebResource]] = Extractors.recordId(record).map(id => List[Place](new Place(id))) ExtractionResult(concept) } def createAggregation(graph: JArr, record: JObj, digitalObject: JObj): Option[ExtractionResult[Aggregation]] = { //how many places instances are available?? //is it correct to use recordId //use apply method because we do not know how to build the identifier for Aggregations right now val recordIdShort = Extractors.recordIdShort(graph)(record) if (recordIdShort.isDefined) { val aggregation = Aggregation(recordIdShort.get) aggregation.addIsShownAt(recordIdShort) aggregation.addAggregatedCHO(recordIdShort) //todo: ich denke, die Implementierung als Ganzes ist noch nicht konsistent // haben wir immer ein digitales Objekt // (behalten wir den Filter "hasNoDigitalObject" - Export auch von pur metadata über OAI), // wenn nein (deswegen hier try) könnte man das aber // noch konsistenter implementieren // Try { Extractors.digitalObjectIdShort(graph)(digitalObject) } match { case Success(digiObjIdShort) => aggregation.addIsShownBy(digiObjIdShort) Extractors.ricoType(record) match { case Some(ricoType) => if (ricoType == "Foto") { aggregation.addEDMObjectFoto(digiObjIdShort) }else { aggregation.addEDMObjectNoFoto(digiObjIdShort) } case None => } Extractors.usageRegulationDigitalObject(graph)(digitalObject.value) match { case head :: Nil =>aggregation.addRights(Option(head)) case _ => } case _ => } //fixed value for provider aggregation.addProvider(Some("Memoriav")) Extractors .recordSetOrInstitution(record.value)("heldBy") .foreach(c => ElasticSearchClientWrapper.getHeldBy( EDM.getInstitutionOrRecordsetIdent(c) ).map(indexValue => aggregation.addDataProvider(Some(indexValue)))) Option(ExtractionResult(aggregation)) } else { //todo (Aggregation_1): was passiert, wenn wir keine Aggregation bilden können? - Exception und der rico record wird nicht nach Europeana geliefert?? None } } def createTimeSpan(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[TimeSpan] = { //how many places instances are available?? //is it correct to use recordId val timespan = new TimeSpan(Extractors.recordId(record).get) //val timespan = new TimeSpan("http://iwerk.ch/891011") timespan.addSkosPrefLabel(Some("only test for timespan")) //val place: Option[List[WebResource]] = Extractors.recordId(record).map(id => List[Place](new Place(id))) ExtractionResult(timespan) } /* todo: my assumption: recordset and institution have cardinality n */ //def extractRecordset(graph: JArr, record: JObj): List[String] = ??? //def extractInstitution(graph: JArr, record: JObj): List[String] = ??? } object EDM { private val isHttpIdentifier = "^http.*".r def getInstitutionOrRecordsetIdent(ident: String ): String = ident.substring(ident.lastIndexOf(("/")) + 1) } case class ExtractionResult[T](obj: T, warnings: mutable.Buffer[String] = mutable.Buffer())