Commit cb716f4a authored by Günter Hipler's avatar Günter Hipler
Browse files

next transformations for CHO plus recordset and institution information for ES record

parent 2d994b07
Pipeline #22975 passed with stages
in 7 minutes and 40 seconds
......@@ -38,6 +38,38 @@ Ich finde bei uns nur literal
da noch mehr aufräumen - ich klebe zuviel zusammen
s. testcase CHOSpec "collect alternatives dcterms:created"
#### dcTerms:extent
kann es immer nur ein physicalObject geben?
@Silvia: laut Deiner Spezifikation rico->edm hat es Kardinalität von n
@Sebastian: für IIIFManifestCreator nutzt sein Extractor collectfirst. Es wird also immer nur eine Instanz verwendet
Ist das so beabsichtigt?
Dieses Prinzip setzt sich in IIIF fort. In der Regel wird nicht auf einer Liste von properties sondern singulären Instanzen gearbeitet
#### dcterms:isPartOf
hier muss ich mir die Zusammenhänge zwischen record und recordset noch genauer ansehen
und mit Daniel/Silvia abstimmen, wie diese Zusammenhänge nach Europeana übertragen werden können
#### edm:currentLocation
##offene Fragen
- kann ein record immer nur auf eine institution bezogen sein oder meherere?
s. dazu EDM recordset und institution Erstellung
--> davon hängt ab, wie ich auf die Objekte zugreife
- für Silvia sind auch noch Punkte offen
- jetzt nutze ich isHeldBy und isPartof für institution und recordset
## allgemeine Fragen
welche Werte sind beim prototyp für institution und recorset relevant?
ich habe im Kopf:
institution: ati
recordset: ati-002
......@@ -22,11 +22,13 @@ package ch.memobase.edm
import ch.memobase.edm.subjects.{ModelXMLTransformer, ProvidedCHO, WebResource}
import java.time.format.DateTimeFormatter
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import scala.util.Try
import ujson.{Arr => JArr, Obj => JObj}
import scala.jdk.CollectionConverters._
class EDM {
......@@ -43,11 +45,11 @@ class EDM {
val graph = Extractors.jsonGraph(messageValue).get.arr
val digitalObject = Extractors.digitalObject(graph).get
val record = Extractors.record(graph).get
val recordId = Extractors.recordId(record).get
val record = Extractors.record(graph)
val recordId = Extractors.recordId(record.get).get
val choExtraction = createChoObject(graph,record,digitalObject)
val webExtraction = createWebResources(graph,record,digitalObject)
val choExtraction = createChoObject(graph,record.get,digitalObject)
val webExtraction = createWebResources(graph,record.get,digitalObject)
webExtraction.obj.foreach(webResource =>
choExtraction.obj.getModel.addAll(webResource.getModel)
......@@ -55,10 +57,12 @@ class EDM {
val esObject = ModelXMLTransformer(model = choExtraction.obj.getModel,
id = recordId,
recordset = "ati-002",
institution = "ati")
recordset = Extractors.recordSetOrInstitution(record)("isPartOf")
.map( ident => EDM.getInstitutionOrRecordsetIdent(ident)),
institution = Extractors.recordSetOrInstitution(record)("heldBy")
.map( identInstitution => EDM.getInstitutionOrRecordsetIdent(identInstitution))
)
//println(esObject)
val result = ExtractionResult((
recordId,
......@@ -71,7 +75,7 @@ class EDM {
}
}
private def createChoObject(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[ProvidedCHO] = {
def createChoObject(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[ProvidedCHO] = {
val cho = new ProvidedCHO(Extractors.recordId(record).get)
......@@ -139,12 +143,30 @@ class EDM {
.dcTermsCreated(graph)(record.value)
.foreach(c => cho.addDcTermsCreated(Some(c)))
Extractors.resourceDCTermsExtent(Extractors.physicalObject(graph).get)
.foreach(c => cho.addDcTermsExtent(Some(c)))
Extractors
.dcTermsIssuedTemporal(graph)(record.value)("issued")
.foreach(c => cho.addDcTermsIssued(Some(c)))
Extractors.resourceDCTermsMedium(graph)
.foreach(c => cho.addDcTermsMedium(Some(c)))
Extractors.dcTermsSpatial(graph)(record.value)
.foreach(c => cho.addDcTermsSpatial(Some(c)))
Extractors.dcTermsIssuedTemporal(graph)(record.value)("temporal")
.foreach(c => cho.addDcTermsTemporal(Some(c)))
ExtractionResult(cho)
}
private def createWebResources(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[List[WebResource]] = {
def createWebResources(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[List[WebResource]] = {
//how many digital objects are part of a document??
val webresource: Option[List[WebResource]] = Extractors.dobjectId(digitalObject).map(id => List[WebResource](new WebResource(id)))
......@@ -152,6 +174,20 @@ class EDM {
}
/*
todo: my assumption: recordset and institution have cardinality n
*/
def extractRecordset(graph: JArr, record: JObj): List[String] = ???
def extractInstitution(graph: JArr, record: JObj): List[String] = ???
}
object EDM {
private val isHttpIdentifier = "^http.*".r
def getInstitutionOrRecordsetIdent(ident: String ): String = ident.substring(ident.lastIndexOf(("/")) + 1)
}
case class ExtractionResult[T](obj: T, warnings: mutable.Buffer[String] = mutable.Buffer())
......
......@@ -18,6 +18,7 @@
package ch.memobase.edm
import org.dmfs.jems.predicate.elementary.Nothing
import ujson.Value.{Value => JValue}
import ujson.{Arr => JArr, Obj => JObj, Str => JStr}
......@@ -44,6 +45,22 @@ object Extractors {
}.get
}
private val fedoraResourceCollection =
(jsonGraph: JArr) =>
(resourceType: String) =>
Try {
jsonGraph.value.collect {
case res
if (res.obj
.contains("type") && res.obj("type").str == resourceType) ||
(res.obj.contains("@type") && res
.obj("@type")
.str == resourceType) =>
res.obj.value
}.toList
}
private val getBlankNodeContent = (graph: JArr) =>
(resource: mutable.LinkedHashMap[String, JValue]) =>
(property: String) =>
......@@ -85,15 +102,40 @@ object Extractors {
val digitalObject: JArr => Try[mutable.LinkedHashMap[String, JValue]] = {
jsonGraph => fedoraResource(jsonGraph)("digitalObject")
}
val physicalObject: JArr => Try[List[mutable.LinkedHashMap[String, JValue]]] = {
jsonGraph => fedoraResourceCollection(jsonGraph)("physicalObject")
}
val record: JArr => Try[mutable.LinkedHashMap[String, JValue]] = jsonGraph =>
fedoraResource(jsonGraph)(
"https://www.ica.org/standards/RiC/ontology#Record"
)
/*val recordSet: JArr => Try[mutable.LinkedHashMap[String, JValue]] =
jsonGraph => fedoraResource(jsonGraph)("recordSet")*/
private val isHttpIdentifier = "^http.*".r
val recordSetOrInstitution: Try[mutable.LinkedHashMap[String, JValue]] => String => List[String] =
record =>
property => {
val idents = if (stringValue(record.get)(property).isDefined) {
List(stringValue(record.get)(property).get)
} else if (arrayValue(record.get)(property).isDefined) {
arrayValue(record.get)(property).get.map(_.str).toList
} else List.empty[String]
//todo das ist im Moment eine Annahme, da ich davon ausgehe, dass heldBy entweder ein Literal
// oder Liste von Literalen mit typ identifier enthält
idents.filter(isHttpIdentifier.matches(_))
}
private val stringValue = (jsonObj: JObj) =>
(valueKey: String) => jsonObj.value.get(valueKey).flatMap(v => v.strOpt)
private val arrayValue = (jsonObj: JObj) =>
(valueKey: String) => jsonObj.value.get(valueKey).flatMap(v => v.arrOpt)
private val numValue = (jsonObj: JObj) =>
(valueKey: String) =>
stringValue(jsonObj)(valueKey).flatMap(v => Some(v.toDouble))
......@@ -257,6 +299,43 @@ object Extractors {
}
).toList
val resourceDCTermsExtent
: List[mutable.LinkedHashMap[String, JValue]] => List[String] =
physicalObjects =>
physicalObjects
.flatMap(obj =>
stringValue(obj)("duration")
)
.map(v => v)
/*
was passiert hier:
die Funktion wird nur auf den Graphen und nicht, wie bei anderen Implementierungen,
auf Grap und ein weiteres Object (hier wäre das nicht der record wie so häufig sondern das physical object)
deswegen muss die Funktion nach physical Objects mit der gewünschten property auf dem Graphen selber suchen
(filter funtion)
auf die Liste der gefundenen physical objects mit property hasCarrierType sucht ein weiterer Extractor
(Extractors.getAllBlankNodeContent) für jedes physical object nach den blank nodes als subject für hasCarrierType
da getAllBlankNodeContent einen Iterator (Liste) zurückliefert braucht es wieder flatMap
auf jeden gefundenen blank node (objectWithCarrierType) wird dann die utility function stringValue genutzt um das
object (Literal) für die property name abzurufen
da stringValue eine Option als Wert hat braucht es auch hier wieder flatMap
ob es der bessere Weg wäre, wenn der client nach den physical objects sucht (wie bei Extractor.resourceDCTermsExtent)
und der nächste Eytractor auf diese Liste angewendet wird - ist irgendwo Geschmackssache...
*/
val resourceDCTermsMedium
: JArr => List[String] =
graph =>
Extractors.physicalObject(graph).get.arr
.filter(_.obj.contains("hasCarrierType"))
.flatMap((physicalObject: JValue) =>
Extractors.getAllBlankNodeContent(graph)(physicalObject.obj)("hasCarrierType"))
.flatMap(objectWithCarrierType => stringValue(objectWithCarrierType.obj)("name"))
.toList
val dcTermsCreated
: JArr => mutable.LinkedHashMap[String, JValue] => List[String] =
graph =>
......@@ -299,6 +378,44 @@ object Extractors {
).toList
val dcTermsIssuedTemporal
: JArr => mutable.LinkedHashMap[String, JValue] => String => Option[String] =
graph =>
record =>
property =>
getAllBlankNodeContent(graph)(record)(property)
.collectFirst {
case nV if nV.contains("normalizedDateValue") => nV("normalizedDateValue").str
case eD if eD.contains("expressedDate") => eD("expressedDate").str
case certainty if certainty.contains("certainty") => certainty("certainty").str
case dQ if dQ.contains("dateQualifier") => dQ("dateQualifier").str
}
val dcTermsSpatial
: JArr => mutable.LinkedHashMap[String, JValue] => List[String] =
graph =>
record =>
getAllBlankNodeContent(graph)(record)("spatial")
.collect {
case placeObject
if placeObject.contains("@type") &&
placeObject("@type").str == "https://www.ica.org/standards/RiC/ontology#Place" &&
placeObject.contains("name") => placeObject("name").str
}.toList
val dcTermsTemporal
: JArr => mutable.LinkedHashMap[String, JValue] => List[String] =
graph =>
record =>
getAllBlankNodeContent(graph)(record)("temporal")
.collectFirst {
case placeObject
if placeObject.contains("@type") &&
placeObject("@type").str == "https://www.ica.org/standards/RiC/ontology#Place" &&
placeObject.contains("name") => placeObject("name").str
}.toList
val resourceTitles
......
......@@ -36,8 +36,8 @@ object ModelXMLTransformer {
model: Model,
id: String,
recordset: String,
institution: String,
recordset: List[String],
institution: List[String],
published: Boolean = true,
format: String = "EDM",
): String = {
......
......@@ -77,7 +77,20 @@ class ProvidedCHO (val id: String) {
def addDcTermsCreated(dctermsCreated:Option[String]): Unit =
dctermsCreated.map(t => model.add(iri(id),DCTERMS.CREATED,factory.createLiteral(t)))
def addDcTermsExtent(dctermsExtent:Option[String]): Unit =
dctermsExtent.map(t => model.add(iri(id),DCTERMS.EXTENT,factory.createLiteral(t)))
def addDcTermsIssued(dctermsIssued:Option[String]): Unit =
dctermsIssued.map(t => model.add(iri(id),DCTERMS.ISSUED,factory.createLiteral(t)))
def addDcTermsMedium(medium:Option[String]): Unit =
medium.map(t => model.add(iri(id),DCTERMS.MEDIUM,factory.createLiteral(t)))
def addDcTermsSpatial(spatial:Option[String]): Unit =
spatial.map(t => model.add(iri(id),DCTERMS.SPATIAL,factory.createLiteral(t)))
def addDcTermsTemporal(temporal:Option[String]): Unit =
temporal.map(t => model.add(iri(id),DCTERMS.TEMPORAL,factory.createLiteral(t)))
def getModel: Model = model
......
......@@ -786,7 +786,10 @@
"_:b71"
],
"hasTitle": "_:b36",
"heldBy": "https://memobase.ch/institution/rts",
"heldBy": ["https://memobase.ch/institution/rts",
"https://memobase.ch/institution/nurEinTest",
"_:b76",
"blanknode und dies werden nicht berücksichtigt"],
"identifiedBy": [
"_:b67",
"_:b84",
......
{
"@graph": [
{
"@id": "_:b0",
"@type": "https://www.ica.org/standards/RiC/ontology#Identifier",
"identifier": "Radio_Stadtfilter-2893",
"type": "oldMemobase"
},
{
"@id": "_:b1",
"@type": "https://www.ica.org/standards/RiC/ontology#CorporateBody",
"eventType": "CREATE",
"name": "Kunsthalle Winterthur"
},
{
"@id": "_:b10",
"@type": "http://www.w3.org/2004/02/skos/core#Concept",
"prefLabel": "Kind"
},
{
"@id": "_:b11",
"@type": "https://www.ica.org/standards/RiC/ontology#CreationRelation",
"creationRelationHasSource": "https://memobase.ch/record/raf-001-2893",
"creationRelationHasTarget": "_:b12",
"name": "Moderator, Redakteur",
"type": "contributor"
},
{
"@id": "_:b12",
"@type": "https://www.ica.org/standards/RiC/ontology#Person",
"firstName": "Luca",
"lastName": "Santarossa",
"agentIsTargetOfCreationRelation": "_:b11",
"name": "Santarossa, Luca"
},
{
"@id": "_:b13",
"@type": "https://www.ica.org/standards/RiC/ontology#DateRange",
"normalizedDateValue": "2012"
},
{
"@id": "_:b14",
"@type": "https://www.ica.org/standards/RiC/ontology#Person",
"firstName": "Serap",
"lastName": "Çileli",
"name": "Çileli, Serap"
},
{
"@id": "_:b15",
"@type": "https://www.ica.org/standards/RiC/ontology#Person",
"firstName": "Kurt",
"lastName": "Aeschbacher",
"name": "Aeschbacher, Kurt"
},
{
"@id": "_:b16",
"@type": "https://www.ica.org/standards/RiC/ontology#Place",
"name": "Winterthur"
},
{
"@id": "_:b17",
"@type": "https://www.ica.org/standards/RiC/ontology#Rule",
"name": "onsite",
"regulates": "https://memobase.ch/physical/raf-001-2893-1",
"type": "access"
},
{
"@id": "_:b18",
"@type": "https://www.ica.org/standards/RiC/ontology#Identifier",
"identifier": "raf-001-2893-1",
"type": "main"
},
{
"@id": "_:b19",
"@type": "https://www.ica.org/standards/RiC/ontology#Rule",
"sameAs": "http://rightsstatements.org/vocab/CNE/1.0/",
"name": "Copyright Not Evaluated (CNE)",
"regulates": "https://memobase.ch/physical/raf-001-2893-1",
"type": "usage"
},
{
"@id": "_:b2",
"@type": "https://www.ica.org/standards/RiC/ontology#Person",
"firstName": "Olivier",
"lastName": "Keller",
"agentIsTargetOfCreationRelation": "_:b3",
"name": "Keller, Olivier"
},
{
"@id": "_:b20",
"@type": "https://www.ica.org/standards/RiC/ontology#CarrierType",
"name": "mp3"
},
{
"@id": "_:b21",
"@type": "https://www.ica.org/standards/RiC/ontology#Identifier",
"identifier": "201212071800-Output",
"type": "callNumber"
},
{
"@id": "_:b22",
"@type": "https://www.ica.org/standards/RiC/ontology#CreationRelation",
"creationRelationHasSource": "https://memobase.ch/record/raf-001-2893",
"creationRelationHasTarget": "_:b23",
"name": "Author",
"type": "creator"
},
{
"@id": "_:b23",
"@type": "https://www.ica.org/standards/RiC/ontology#Person",
"firstName": "Luca",
"lastName": "Santarossa",
"agentIsTargetOfCreationRelation": "_:b22",
"name": "Santarossa, Luca"
},
{
"@id": "_:b24",
"@type": "https://www.ica.org/standards/RiC/ontology#Title",
"title": "A Hard Day's Evening",
"type": "series"
},
{
"@id": "_:b25",
"@type": "https://www.ica.org/standards/RiC/ontology#Rule",
"name": "sf",
"regulates": "https://memobase.ch/record/raf-001-2893",
"type": "holder"
},
{
"@id": "_:b26",
"@type": "https://www.ica.org/standards/RiC/ontology#Identifier",
"identifier": "raf-001-2893-1",
"type": "main"
},
{
"@id": "_:b27",
"@type": "https://www.ica.org/standards/RiC/ontology#Rule",
"name": "public",
"regulates": "https://memobase.ch/digital/raf-001-2893-1",
"type": "access"
},
{
"@id": "_:b28",
"@type": "https://www.ica.org/standards/RiC/ontology#Rule",
"sameAs": "http://rightsstatements.org/vocab/CNE/1.0/",
"name": "Copyright Not Evaluated (CNE)",
"regulates": "https://memobase.ch/digital/raf-001-2893-1",
"type": "usage"
},
{
"@id": "_:b29",
"@type": "https://www.ica.org/standards/RiC/ontology#Person",
"firstName": "Gabriela",
"lastName": "Sperl",
"name": "Sperl, Gabriela"
},
{
"@id": "_:b3",
"@type": "https://www.ica.org/standards/RiC/ontology#CreationRelation",
"creationRelationHasSource": "https://memobase.ch/record/raf-001-2893",
"creationRelationHasTarget": "_:b2",
"name": "Redakteur",
"type": "contributor"
},
{
"@id": "_:b30",
"@type": "https://www.ica.org/standards/RiC/ontology#Person",
"firstName": "Hank",
"lastName": "Stroke",
"name": "Stroke, Hank"
},
{
"@id": "_:b31",
"@type": "http://www.w3.org/2004/02/skos/core#Concept",
"prefLabel": "Gesellschaft"
},
{
"@id": "_:b32",
"@type": "https://www.ica.org/standards/RiC/ontology#Title",
"title": "A Hard Day's Evening vom 07.12.2012",
"type": "broadcast"
},
{
"@id": "_:b33",
"@type": "https://www.ica.org/standards/RiC/ontology#Identifier",
"identifier": "raf-001-2893",
"type": "main"
},
{
"@id": "_:b34",
"@type": "http://www.w3.org/2004/02/skos/core#Concept",
"prefLabel": "Schweiz"
},
{
"@id": "_:b35",
"@type": "https://www.ica.org/standards/RiC/ontology#CorporateBody",
"name": "Verena von Horsten"
},
{
"@id": "_:b36",
"@type": "https://www.ica.org/standards/RiC/ontology#Person",
"firstName": "Anatole",
"lastName": "Taubmann",
"name": "Taubmann, Anatole"
},
{
"@id": "_:b37",
"@type": "https://www.ica.org/standards/RiC/ontology#Place",
"name": "Zürich"
},
{
"@id": "_:b38",
"@type": "https://www.ica.org/standards/RiC/ontology#Person",
"firstName": "Brigitte",
"lastName": "Voss-Balzarini",
"name": "Voss-Balzarini, Brigitte"
},
{
"@id": "_:b39",
"@type": "http://www.w3.org/2004/02/skos/core#Concept",
"prefLabel": "Aktualität"
},
{
"@id": "_:b4",
"@type": "https://www.ica.org/standards/RiC/ontology#Activity",
"affects": "_:b6",
"beginningDate": "2021-13-16T17:13:05+0000",
"endDate": "2021-13-16T17:13:05+0000",
"performedBy": "_:b5",
"resultsIn": "_:b7",
"type": "enrichment"