Commit cb716f4a authored by Günter Hipler's avatar Günter Hipler
Browse files

next transformations for CHO plus recordset and institution information for ES record

parent 2d994b07
Pipeline #22975 passed with stages
in 7 minutes and 40 seconds
......@@ -38,6 +38,38 @@ Ich finde bei uns nur literal
da noch mehr aufräumen - ich klebe zuviel zusammen
s. testcase CHOSpec "collect alternatives dcterms:created"
#### dcTerms:extent
kann es immer nur ein physicalObject geben?
@Silvia: laut Deiner Spezifikation rico->edm hat es Kardinalität von n
@Sebastian: für IIIFManifestCreator nutzt sein Extractor collectfirst. Es wird also immer nur eine Instanz verwendet
Ist das so beabsichtigt?
Dieses Prinzip setzt sich in IIIF fort. In der Regel wird nicht auf einer Liste von properties sondern singulären Instanzen gearbeitet
#### dcterms:isPartOf
hier muss ich mir die Zusammenhänge zwischen record und recordset noch genauer ansehen
und mit Daniel/Silvia abstimmen, wie diese Zusammenhänge nach Europeana übertragen werden können
#### edm:currentLocation
##offene Fragen
- kann ein record immer nur auf eine institution bezogen sein oder meherere?
s. dazu EDM recordset und institution Erstellung
--> davon hängt ab, wie ich auf die Objekte zugreife
- für Silvia sind auch noch Punkte offen
- jetzt nutze ich isHeldBy und isPartof für institution und recordset
## allgemeine Fragen
welche Werte sind beim prototyp für institution und recorset relevant?
ich habe im Kopf:
institution: ati
recordset: ati-002
......@@ -22,11 +22,13 @@ package ch.memobase.edm
import ch.memobase.edm.subjects.{ModelXMLTransformer, ProvidedCHO, WebResource}
import java.time.format.DateTimeFormatter
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import scala.util.Try
import ujson.{Arr => JArr, Obj => JObj}
import scala.jdk.CollectionConverters._
class EDM {
......@@ -43,11 +45,11 @@ class EDM {
val graph = Extractors.jsonGraph(messageValue).get.arr
val digitalObject = Extractors.digitalObject(graph).get
val record = Extractors.record(graph).get
val recordId = Extractors.recordId(record).get
val record = Extractors.record(graph)
val recordId = Extractors.recordId(record.get).get
val choExtraction = createChoObject(graph,record,digitalObject)
val webExtraction = createWebResources(graph,record,digitalObject)
val choExtraction = createChoObject(graph,record.get,digitalObject)
val webExtraction = createWebResources(graph,record.get,digitalObject)
webExtraction.obj.foreach(webResource =>
choExtraction.obj.getModel.addAll(webResource.getModel)
......@@ -55,10 +57,12 @@ class EDM {
val esObject = ModelXMLTransformer(model = choExtraction.obj.getModel,
id = recordId,
recordset = "ati-002",
institution = "ati")
recordset = Extractors.recordSetOrInstitution(record)("isPartOf")
.map( ident => EDM.getInstitutionOrRecordsetIdent(ident)),
institution = Extractors.recordSetOrInstitution(record)("heldBy")
.map( identInstitution => EDM.getInstitutionOrRecordsetIdent(identInstitution))
)
//println(esObject)
val result = ExtractionResult((
recordId,
......@@ -71,7 +75,7 @@ class EDM {
}
}
private def createChoObject(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[ProvidedCHO] = {
def createChoObject(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[ProvidedCHO] = {
val cho = new ProvidedCHO(Extractors.recordId(record).get)
......@@ -139,12 +143,30 @@ class EDM {
.dcTermsCreated(graph)(record.value)
.foreach(c => cho.addDcTermsCreated(Some(c)))
Extractors.resourceDCTermsExtent(Extractors.physicalObject(graph).get)
.foreach(c => cho.addDcTermsExtent(Some(c)))
Extractors
.dcTermsIssuedTemporal(graph)(record.value)("issued")
.foreach(c => cho.addDcTermsIssued(Some(c)))
Extractors.resourceDCTermsMedium(graph)
.foreach(c => cho.addDcTermsMedium(Some(c)))
Extractors.dcTermsSpatial(graph)(record.value)
.foreach(c => cho.addDcTermsSpatial(Some(c)))
Extractors.dcTermsIssuedTemporal(graph)(record.value)("temporal")
.foreach(c => cho.addDcTermsTemporal(Some(c)))
ExtractionResult(cho)
}
private def createWebResources(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[List[WebResource]] = {
def createWebResources(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[List[WebResource]] = {
//how many digital objects are part of a document??
val webresource: Option[List[WebResource]] = Extractors.dobjectId(digitalObject).map(id => List[WebResource](new WebResource(id)))
......@@ -152,6 +174,20 @@ class EDM {
}
/*
todo: my assumption: recordset and institution have cardinality n
*/
def extractRecordset(graph: JArr, record: JObj): List[String] = ???
def extractInstitution(graph: JArr, record: JObj): List[String] = ???
}
object EDM {
private val isHttpIdentifier = "^http.*".r
def getInstitutionOrRecordsetIdent(ident: String ): String = ident.substring(ident.lastIndexOf(("/")) + 1)
}
case class ExtractionResult[T](obj: T, warnings: mutable.Buffer[String] = mutable.Buffer())
......
......@@ -18,6 +18,7 @@
package ch.memobase.edm
import org.dmfs.jems.predicate.elementary.Nothing
import ujson.Value.{Value => JValue}
import ujson.{Arr => JArr, Obj => JObj, Str => JStr}
......@@ -44,6 +45,22 @@ object Extractors {
}.get
}
private val fedoraResourceCollection =
(jsonGraph: JArr) =>
(resourceType: String) =>
Try {
jsonGraph.value.collect {
case res
if (res.obj
.contains("type") && res.obj("type").str == resourceType) ||
(res.obj.contains("@type") && res
.obj("@type")
.str == resourceType) =>
res.obj.value
}.toList
}
private val getBlankNodeContent = (graph: JArr) =>
(resource: mutable.LinkedHashMap[String, JValue]) =>
(property: String) =>
......@@ -85,15 +102,40 @@ object Extractors {
val digitalObject: JArr => Try[mutable.LinkedHashMap[String, JValue]] = {
jsonGraph => fedoraResource(jsonGraph)("digitalObject")
}
val physicalObject: JArr => Try[List[mutable.LinkedHashMap[String, JValue]]] = {
jsonGraph => fedoraResourceCollection(jsonGraph)("physicalObject")
}
val record: JArr => Try[mutable.LinkedHashMap[String, JValue]] = jsonGraph =>
fedoraResource(jsonGraph)(
"https://www.ica.org/standards/RiC/ontology#Record"
)
/*val recordSet: JArr => Try[mutable.LinkedHashMap[String, JValue]] =
jsonGraph => fedoraResource(jsonGraph)("recordSet")*/
private val isHttpIdentifier = "^http.*".r
val recordSetOrInstitution: Try[mutable.LinkedHashMap[String, JValue]] => String => List[String] =
record =>
property => {
val idents = if (stringValue(record.get)(property).isDefined) {
List(stringValue(record.get)(property).get)
} else if (arrayValue(record.get)(property).isDefined) {
arrayValue(record.get)(property).get.map(_.str).toList
} else List.empty[String]
//todo das ist im Moment eine Annahme, da ich davon ausgehe, dass heldBy entweder ein Literal
// oder Liste von Literalen mit typ identifier enthält
idents.filter(isHttpIdentifier.matches(_))
}
private val stringValue = (jsonObj: JObj) =>
(valueKey: String) => jsonObj.value.get(valueKey).flatMap(v => v.strOpt)
private val arrayValue = (jsonObj: JObj) =>
(valueKey: String) => jsonObj.value.get(valueKey).flatMap(v => v.arrOpt)
private val numValue = (jsonObj: JObj) =>
(valueKey: String) =>
stringValue(jsonObj)(valueKey).flatMap(v => Some(v.toDouble))
......@@ -257,6 +299,43 @@ object Extractors {
}
).toList
val resourceDCTermsExtent
: List[mutable.LinkedHashMap[String, JValue]] => List[String] =
physicalObjects =>
physicalObjects
.flatMap(obj =>
stringValue(obj)("duration")
)
.map(v => v)
/*
was passiert hier:
die Funktion wird nur auf den Graphen und nicht, wie bei anderen Implementierungen,
auf Grap und ein weiteres Object (hier wäre das nicht der record wie so häufig sondern das physical object)
deswegen muss die Funktion nach physical Objects mit der gewünschten property auf dem Graphen selber suchen
(filter funtion)
auf die Liste der gefundenen physical objects mit property hasCarrierType sucht ein weiterer Extractor
(Extractors.getAllBlankNodeContent) für jedes physical object nach den blank nodes als subject für hasCarrierType
da getAllBlankNodeContent einen Iterator (Liste) zurückliefert braucht es wieder flatMap
auf jeden gefundenen blank node (objectWithCarrierType) wird dann die utility function stringValue genutzt um das
object (Literal) für die property name abzurufen
da stringValue eine Option als Wert hat braucht es auch hier wieder flatMap
ob es der bessere Weg wäre, wenn der client nach den physical objects sucht (wie bei Extractor.resourceDCTermsExtent)
und der nächste Eytractor auf diese Liste angewendet wird - ist irgendwo Geschmackssache...
*/
val resourceDCTermsMedium
: JArr => List[String] =
graph =>
Extractors.physicalObject(graph).get.arr
.filter(_.obj.contains("hasCarrierType"))
.flatMap((physicalObject: JValue) =>
Extractors.getAllBlankNodeContent(graph)(physicalObject.obj)("hasCarrierType"))
.flatMap(objectWithCarrierType => stringValue(objectWithCarrierType.obj)("name"))
.toList
val dcTermsCreated
: JArr => mutable.LinkedHashMap[String, JValue] => List[String] =
graph =>
......@@ -299,6 +378,44 @@ object Extractors {
).toList
val dcTermsIssuedTemporal
: JArr => mutable.LinkedHashMap[String, JValue] => String => Option[String] =
graph =>
record =>
property =>
getAllBlankNodeContent(graph)(record)(property)
.collectFirst {
case nV if nV.contains("normalizedDateValue") => nV("normalizedDateValue").str
case eD if eD.contains("expressedDate") => eD("expressedDate").str
case certainty if certainty.contains("certainty") => certainty("certainty").str
case dQ if dQ.contains("dateQualifier") => dQ("dateQualifier").str
}
val dcTermsSpatial
: JArr => mutable.LinkedHashMap[String, JValue] => List[String] =
graph =>
record =>
getAllBlankNodeContent(graph)(record)("spatial")
.collect {
case placeObject
if placeObject.contains("@type") &&
placeObject("@type").str == "https://www.ica.org/standards/RiC/ontology#Place" &&
placeObject.contains("name") => placeObject("name").str
}.toList
val dcTermsTemporal
: JArr => mutable.LinkedHashMap[String, JValue] => List[String] =
graph =>
record =>
getAllBlankNodeContent(graph)(record)("temporal")
.collectFirst {
case placeObject
if placeObject.contains("@type") &&
placeObject("@type").str == "https://www.ica.org/standards/RiC/ontology#Place" &&
placeObject.contains("name") => placeObject("name").str
}.toList
val resourceTitles
......
......@@ -36,8 +36,8 @@ object ModelXMLTransformer {
model: Model,
id: String,
recordset: String,
institution: String,
recordset: List[String],
institution: List[String],
published: Boolean = true,
format: String = "EDM",
): String = {
......
......@@ -77,7 +77,20 @@ class ProvidedCHO (val id: String) {
def addDcTermsCreated(dctermsCreated:Option[String]): Unit =
dctermsCreated.map(t => model.add(iri(id),DCTERMS.CREATED,factory.createLiteral(t)))
def addDcTermsExtent(dctermsExtent:Option[String]): Unit =
dctermsExtent.map(t => model.add(iri(id),DCTERMS.EXTENT,factory.createLiteral(t)))
def addDcTermsIssued(dctermsIssued:Option[String]): Unit =
dctermsIssued.map(t => model.add(iri(id),DCTERMS.ISSUED,factory.createLiteral(t)))
def addDcTermsMedium(medium:Option[String]): Unit =
medium.map(t => model.add(iri(id),DCTERMS.MEDIUM,factory.createLiteral(t)))
def addDcTermsSpatial(spatial:Option[String]): Unit =
spatial.map(t => model.add(iri(id),DCTERMS.SPATIAL,factory.createLiteral(t)))
def addDcTermsTemporal(temporal:Option[String]): Unit =
temporal.map(t => model.add(iri(id),DCTERMS.TEMPORAL,factory.createLiteral(t)))
def getModel: Model = model
......
......@@ -786,7 +786,10 @@
"_:b71"
],
"hasTitle": "_:b36",
"heldBy": "https://memobase.ch/institution/rts",
"heldBy": ["https://memobase.ch/institution/rts",
"https://memobase.ch/institution/nurEinTest",
"_:b76",
"blanknode und dies werden nicht berücksichtigt"],
"identifiedBy": [
"_:b67",
"_:b84",
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -26,6 +26,8 @@ import org.eclipse.rdf4j.rio.Rio
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers
import scala.util.{Failure, Success}
class CHOSpec extends AnyFunSuite with Matchers{
private lazy val rawRicoIndent = loadFile("src/test/resources/rawIndent.json")
......@@ -38,6 +40,13 @@ class CHOSpec extends AnyFunSuite with Matchers{
private lazy val ricoNoSourceInRecord = loadFile("src/test/resources/raw2.indent.json")
private lazy val ricoCertainty = loadFile("src/test/resources/rico.certainty.json")
private lazy val ricoDuration = loadFile("src/test/resources/rico.duration.json")
private lazy val ricoNoPhysicalObjectAvailable = loadFile("src/test/resources/rico.NoPhysicalObject.json")
private val ricoExpressedDateForIssued = loadFile("src/test/resources/ricoFakedExpressedDateForIssued.json")
private val ricoNoCorrectIssuedProperty = loadFile("src/test/resources/rico.durationNoCorrectIssuedDate.json")
test("get creator") {
......@@ -203,5 +212,110 @@ class CHOSpec extends AnyFunSuite with Matchers{
}
test ("collect dcterms:extent") {
val graph = Extractors.jsonGraph(ricoDuration).get.arr
val physicalObjects = Extractors.physicalObject(graph).get
val phyObjWithDuration = Extractors.resourceDCTermsExtent(physicalObjects)
assert(phyObjWithDuration.length == 1 && phyObjWithDuration.head == "00:30:00")
//no duration on physical object available
val graph1 = Extractors.jsonGraph(ricoCertainty).get.arr
val physicalObjects1 = Extractors.physicalObject(graph1).get
val noDuration = Extractors.resourceDCTermsExtent(physicalObjects1)
assert(noDuration.isEmpty)
//no physical objects available (haven't found an object so far in dataset - test-graph was edited by GH)
val graph2 = Extractors.jsonGraph(ricoNoPhysicalObjectAvailable).get.arr
val physicalObjectsEmpty = Extractors.physicalObject(graph2).get
assert(physicalObjectsEmpty.isEmpty)
val responseForEmptyPhysicalObjectsList = Extractors.resourceDCTermsExtent(physicalObjectsEmpty)
assert(responseForEmptyPhysicalObjectsList.isEmpty)
}
test ("collect dcterms:issued") {
val graph = Extractors.jsonGraph(ricoDuration).get.arr
val record = Extractors.record(graph).get
val firstCollectedNormalizedIssuedDate = Extractors.dcTermsIssuedTemporal(graph)(record)("issued")
assert(firstCollectedNormalizedIssuedDate.nonEmpty && firstCollectedNormalizedIssuedDate.get == "2012-12-07")
val graph1 = Extractors.jsonGraph(ricoExpressedDateForIssued).get.arr
val record1 = Extractors.record(graph1).get
val firstCollectedExpressedDate = Extractors.dcTermsIssuedTemporal(graph1)(record1)("issued")
assert(firstCollectedExpressedDate.nonEmpty && firstCollectedExpressedDate.get == "1953-12-07-1953-12-07")
val graph2 = Extractors.jsonGraph(ricoCertainty).get.arr
val record2 = Extractors.record(graph2).get
val noIssuedDate = Extractors.dcTermsIssuedTemporal(graph2)(record2)("issued")
assert(noIssuedDate.isEmpty)
//partial collectFirst function doesn't match even there is a blank node for the issued property
//ricoNoCorrectIssuedProperty
val graph3 = Extractors.jsonGraph(ricoNoCorrectIssuedProperty).get.arr
val record3 = Extractors.record(graph3).get
val noCorrectIssuedDate = Extractors.dcTermsIssuedTemporal(graph3)(record3)("issued")
assert(noCorrectIssuedDate.isEmpty)
}
test ("collect dcterms:medium") {
val graph = Extractors.jsonGraph(rawRicoIndent).get.arr
//s. zum Umgang mit dem Extractor auch den ausführlichen Kommentar bei der Funktion
val mediums = Extractors.resourceDCTermsMedium(graph)
assert(mediums.nonEmpty && mediums.head == "Nitrato")
}
test ("collect dcterms:spatial") {
val graph = Extractors.jsonGraph(ricoNoSourceInRecord).get.arr
val record = Extractors.record(graph).get
//s. zum Umgang mit dem Extractor auch den ausführlichen Kommentar bei der Funktion
val spatials = Extractors.dcTermsSpatial(graph)(record)
assert(spatials.length == 2 && spatials.head == "Madrid" && spatials(1) == "Spagna" )
}
test ("collect dcterms:temporal") {
val graph = Extractors.jsonGraph(ricoContributorP60441).get.arr
val record = Extractors.record(graph).get
val temporal: Option[String] = Extractors.dcTermsIssuedTemporal(graph)(record)("temporal")
assert(temporal.isDefined && temporal.get == "1949-07-22-1949-07-22; 1944")
}
test ("get institution from graph") {
val graph = Extractors.jsonGraph(ricoContributorP60441).get.arr
val record = Extractors.record(graph)
//extract institutions
val identsInstitution = Extractors.recordSetOrInstitution(record)("heldBy")
assert(identsInstitution.length == 2 && identsInstitution.head == "https://memobase.ch/institution/rts" &&
identsInstitution(1) == "https://memobase.ch/institution/nurEinTest")
val kuerzelInstitution = identsInstitution.map( ident => EDM.getInstitutionOrRecordsetIdent(ident))
assert(kuerzelInstitution.length == 2 && kuerzelInstitution.head == "rts" &&
kuerzelInstitution(1) == "nurEinTest")
//extract recordsets
//isPartOf
val identsRecordsets = Extractors.recordSetOrInstitution(record)("isPartOf")
assert(identsRecordsets.length == 1 && identsRecordsets.head == "https://memobase.ch/recordSet/rts-002")
val kuerzelRecordsets = identsRecordsets.map( ident => EDM.getInstitutionOrRecordsetIdent(ident))
assert(kuerzelRecordsets.length == 1 && kuerzelRecordsets.head == "rts-002")
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment