Commit b70c3c0e authored by Günter Hipler's avatar Günter Hipler
Browse files

continuing after quite some pause

parent 5e9235ac
Pipeline #22024 failed with stages
in 5 minutes
......@@ -32,6 +32,10 @@ import scala.util.Try
class KafkaTopology extends Logging {
import KafkaTopologyUtils._
//import org.apache.kafka.streams.scala.serialization.Serdes
//changed in Kafka client 2.7.0
//but with the above we do not have the correct implicits for builder streams
//evaluation necessary - not now...
import Serdes._
def build(
......@@ -46,10 +50,9 @@ class KafkaTopology extends Logging {
//val Array(noDigitalObject, noLocator, noPhoto, isPhoto) = source
//we have to discuss, which documents should be delivered to Europeana
val Array(noDigitalObject, noLocator, isEDMDeliverable) = source
val Array(do_we_have_any_Prerequisites, isEDMDeliverable) = source
.branch(
(_, v) => hasNoDigitalObject(v),
(_, v) => hasNoLocator(v),
(_, v) => checkPrerequisites(v),
(_, _) => true
)
......
......@@ -28,27 +28,8 @@ object KafkaTopologyUtils {
def extractId(uri: String): String =
uri.split("/").last.split("\\.(?=[^.]+$)")(0)
def isNotPhotography(msgVal: String): Boolean = {
val graph = Extractors.jsonGraph(msgVal)
val record = graph.flatMap(v => Extractors.record(v.arr))
if (record.isSuccess) {
!Extractors.ricoType(record.get).contains("Foto")
} else {
true
}
}
def hasNoDigitalObject(msgVal: String): Boolean =
Extractors
.jsonGraph(msgVal)
.flatMap(v => Extractors.digitalObject(v.arr))
.isFailure
def hasNoLocator(msgVal: String): Boolean =
Extractors
.jsonGraph(msgVal)
.flatMap(v => Extractors.digitalObject(v.arr))
.flatMap(dO => Try(Extractors.imageResourceId(dO).get))
.isFailure
def checkPrerequisites(msgVal: String): Boolean = false
}
......@@ -23,7 +23,7 @@ package ch.memobase.edm
import ch.memobase.rdf.Helper
import ch.memobase.rdf.vocabularies.EDM
import org.eclipse.rdf4j.model.Model
import org.eclipse.rdf4j.model.{IRI, Model, Resource}
import org.eclipse.rdf4j.model.impl.{DynamicModelFactory, SimpleValueFactory}
import org.eclipse.rdf4j.model.vocabulary.{DC, DCTERMS, RDF}
......@@ -64,8 +64,12 @@ class EDM {
private def createChoObject(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[ProvidedCHO] = {
val cho = new ProvidedCHO
cho.addId(Extractors.recordId(record).get)
val cho = new ProvidedCHO(Extractors.recordId(record).get)
cho.addTitel(Extractors.title(record))
cho.addDescription(Extractors.dctAbstract(record))
cho.addDescription(Extractors.scopeAndContent(record))
cho.addDescription(Extractors.descriptiveNote(record))
cho.addCreationDate(Extractors.creationDate(graph)(record.obj))
ExtractionResult(cho)
}
......@@ -76,26 +80,26 @@ class EDM {
case class ExtractionResult[T](obj: T, warnings: mutable.Buffer[String] = mutable.Buffer())
class ProvidedCHO {
class ProvidedCHO (val id: String) {
import org.eclipse.rdf4j.model.util.Values.iri
import org.eclipse.rdf4j.model.util.Values.literal
private var _id:Option[String] = None
//import org.eclipse.rdf4j.model.util.Values.literal
private val model = Helper.getModelWithEDMNamespaces
model.add(iri(id),RDF.TYPE, EDM.ProvidedCHO)
private val factory = SimpleValueFactory.getInstance()
private lazy val factory = SimpleValueFactory.getInstance()
def addDescription(desc: Option[String]): Unit =
desc.map(t => model.add(iri(id),DC.DESCRIPTION,factory.createLiteral(t)))
def addId(id: String): Unit = {
_id = Some(id)
model.add(iri(id),RDF.TYPE, EDM.ProvidedCHO)
def addTitel(title: Option[String]): Unit = {
title.map(t => model.add(iri(id),DC.TITLE,factory.createLiteral(t)))
}
def getModel: Model = model
//only for test purposes by now
//model.add(iri(_id),DCTERMS.ISSUED,factory.createLiteral("1993"))
//model.add(iri(_id),DC.CONTRIBUTOR, factory.createLiteral("Günter Hipler"))
def addCreationDate(createDate:Option[String]): Unit =
createDate.map(t => model.add(iri(id),DC.DATE,factory.createLiteral(t)))
def getModel: Model = model
}
......
{"@graphThisIsWrong":[{"@id":"_:b0","@type":"http://www.w3.org/2004/02/skos/core#Concept","prefLabel":"Minusio edif"},{"@id":"_:b1","@type":"https://www.ica.org/standards/RiC/ontology#Rule","name":"Archivio di Stato del Cantone Ticino","regulates":"https://memobase.ch/record/ati-001-L_44_13","type":"holder"},{"@id":"_:b10","@type":"https://www.ica.org/standards/RiC/ontology#DateRange","normalizedDateValue":"1930/1950"},{"@id":"_:b11","@type":"http://www.w3.org/2004/02/skos/core#Concept","prefLabel":"Locarnese edif"},{"@id":"_:b12","@type":"https://www.ica.org/standards/RiC/ontology#Language","name":"Italian","type":"content"},{"@id":"_:b13","@type":"https://www.ica.org/standards/RiC/ontology#Mechanism","name":"LanguagesNormalizer","performs":"_:b14"},{"@id":"_:b14","@type":"https://www.ica.org/standards/RiC/ontology#Activity","affects":"_:b12","beginningDate":"2021-45-26T09:45:39+0000","endDate":"2021-45-26T09:45:39+0000","performedBy":"_:b13","resultsIn":"_:b5","type":"enrichment"},{"@id":"_:b15","@type":"https://www.ica.org/standards/RiC/ontology#Rule","name":"public","regulates":"https://memobase.ch/digital/ati-001-L_44_13-1","type":"access"},{"@id":"_:b16","@type":"https://www.ica.org/standards/RiC/ontology#Identifier","identifier":"ati-001-L_44_13-1","type":"main"},{"@id":"_:b17","@type":"https://www.ica.org/standards/RiC/ontology#Person","firstName":"Valentino","lastName":"Monotti","agentIsTargetOfCreationRelation":"_:b8","name":"Valentino Monotti"},{"@id":"_:b18","@type":"https://www.ica.org/standards/RiC/ontology#Rule","name":"onsite","regulates":"https://memobase.ch/physical/ati-001-L_44_13-1","type":"access"},{"@id":"_:b19","@type":"https://www.ica.org/standards/RiC/ontology#CarrierType","name":"Nitrato"},{"@id":"_:b2","@type":"https://www.ica.org/standards/RiC/ontology#Identifier","identifier":"L/44.13","type":"original"},{"@id":"_:b20","@type":"https://www.ica.org/standards/RiC/ontology#Identifier","identifier":"L/44.13","type":"callNumber"},{"@id":"_:b21","@type":"https://www.ica.org/standards/RiC/ontology#Rule","sameAs":"http://rightsstatements.org/vocab/CNE/1.0/","name":"Copyright Not Evaluated (CNE)","regulates":"https://memobase.ch/physical/ati-001-L_44_13-1","type":"usage"},{"@id":"_:b22","@type":"https://www.ica.org/standards/RiC/ontology#Identifier","identifier":"ati-001-L_44_13-1","type":"main"},{"@id":"_:b23","@type":"https://www.ica.org/standards/RiC/ontology#Rule","sameAs":"http://rightsstatements.org/vocab/CNE/1.0/","name":"Copyright Not Evaluated (CNE)","regulates":"https://memobase.ch/digital/ati-001-L_44_13-1","type":"usage"},{"@id":"_:b3","@type":"https://www.ica.org/standards/RiC/ontology#Place","name":"Minusio"},{"@id":"_:b4","@type":"http://www.w3.org/2004/02/skos/core#Concept","prefLabel":"Edifici, piazze, strade, monumenti"},{"@id":"_:b5","@type":"https://www.ica.org/standards/RiC/ontology#Language","sameAs":"http://www.wikidata.org/entity/Q652","name":[{"@language":"de","@value":"Italienisch"},{"@language":"fr","@value":"italien"},{"@language":"it","@value":"italiano"}],"resultsFrom":"_:b14","type":"content"},{"@id":"_:b6","@type":"https://www.ica.org/standards/RiC/ontology#Identifier","identifier":"ASTi-L_44_13","type":"oldMemobase"},{"@id":"_:b7","@type":"https://www.ica.org/standards/RiC/ontology#Identifier","identifier":"ati-001-L_44_13","type":"main"},{"@id":"_:b8","@type":"https://www.ica.org/standards/RiC/ontology#CreationRelation","creationRelationHasSource":"https://memobase.ch/record/ati-001-L_44_13","creationRelationHasTarget":"_:b17","type":"creator"},{"@id":"_:b9","@type":"https://www.ica.org/standards/RiC/ontology#Title","title":"Hotel Esplanade","type":"main"},{"@id":"https://memobase.ch/digital/ati-001-L_44_13-1","@type":"https://www.ica.org/standards/RiC/ontology#Instantiation","identifiedBy":"_:b22","instantiates":"https://memobase.ch/record/ati-001-L_44_13","isDerivedFromInstantiation":"https://memobase.ch/physical/ati-001-L_44_13-1","regulatedBy":["_:b15","_:b23"],"type":"digitalObject"},{"@id":"https://memobase.ch/physical/ati-001-L_44_13-1","@type":"https://www.ica.org/standards/RiC/ontology#Instantiation","P60558":"B/N","eventType":"CREATE","hasCarrierType":"_:b19","hasDerivedInstantiation":"https://memobase.ch/digital/ati-001-L_44_13-1","identifiedBy":["_:b20","_:b16"],"instantiates":"https://memobase.ch/record/ati-001-L_44_13","physicalCharacteristics":["Bemerkung: Pellicola","Verfahren: Negativo","Höhe: 13 cm","Weite: 18 cm"],"regulatedBy":["_:b18","_:b21"],"type":"physicalObject"},{"@id":"https://memobase.ch/record/ati-001-L_44_13","@type":"https://www.ica.org/standards/RiC/ontology#Record","abstract":"Veduta dell'albergo","created":"_:b10","spatial":"_:b3","P60451":"https://memobase.ch/institution/mrv","sameAs":"http://www3.ti.ch/DECS/sw/struttura/decs/dcsu/ac/asti/cff/index.php?p=immagine&id_immagine=17666","eventType":"CREATE","isPublished":false,"conditionsOfUse":"Si veda il regolamento per la consultazione e la riproduzione di immagini fotografiche","hasInstantiation":["https://memobase.ch/digital/ati-001-L_44_13-1","https://memobase.ch/physical/ati-001-L_44_13-1"],"hasLanguage":["_:b5","_:b12"],"hasSubject":["_:b4","_:b0","_:b11"],"hasTitle":"_:b9","heldBy":"https://memobase.ch/institution/ati","identifiedBy":["_:b2","_:b6","_:b7"],"isPartOf":"https://memobase.ch/recordSet/ati-001","recordResourceOrInstantiationIsSourceOfCreationRelation":"_:b8","regulatedBy":"_:b1","scopeAndContent":"Discreto","source":"Monotti_L_44.13.jpg","title":"Hotel Esplanade","type":"Foto"}],"@context":{"prefLabel":{"@id":"http://www.w3.org/2004/02/skos/core#prefLabel"},"regulatedBy":{"@id":"https://www.ica.org/standards/RiC/ontology#regulatedBy","@type":"@id"},"identifiedBy":{"@id":"https://www.ica.org/standards/RiC/ontology#identifiedBy","@type":"@id"},"source":{"@id":"https://www.ica.org/standards/RiC/ontology#source"},"spatial":{"@id":"http://purl.org/dc/terms/spatial","@type":"@id"},"scopeAndContent":{"@id":"https://www.ica.org/standards/RiC/ontology#scopeAndContent"},"abstract":{"@id":"http://purl.org/dc/terms/abstract"},"heldBy":{"@id":"https://www.ica.org/standards/RiC/ontology#heldBy","@type":"@id"},"hasSubject":{"@id":"https://www.ica.org/standards/RiC/ontology#hasSubject","@type":"@id"},"hasLanguage":{"@id":"https://www.ica.org/standards/RiC/ontology#hasLanguage","@type":"@id"},"isPublished":{"@id":"https://memobase.ch/internal/isPublished","@type":"http://www.w3.org/2001/XMLSchema#boolean"},"hasInstantiation":{"@id":"https://www.ica.org/standards/RiC/ontology#hasInstantiation","@type":"@id"},"recordResourceOrInstantiationIsSourceOfCreationRelation":{"@id":"https://www.ica.org/standards/RiC/ontology#recordResourceOrInstantiationIsSourceOfCreationRelation","@type":"@id"},"hasTitle":{"@id":"https://www.ica.org/standards/RiC/ontology#hasTitle","@type":"@id"},"P60451":{"@id":"http://rdaregistry.info/Elements/u/P60451","@type":"@id"},"type":{"@id":"https://www.ica.org/standards/RiC/ontology#type"},"title":{"@id":"https://www.ica.org/standards/RiC/ontology#title"},"created":{"@id":"http://purl.org/dc/terms/created","@type":"@id"},"conditionsOfUse":{"@id":"https://www.ica.org/standards/RiC/ontology#conditionsOfUse"},"isPartOf":{"@id":"https://www.ica.org/standards/RiC/ontology#isPartOf","@type":"@id"},"eventType":{"@id":"https://memobase.ch/internal/eventType"},"sameAs":{"@id":"http://schema.org/sameAs"},"name":{"@id":"https://www.ica.org/standards/RiC/ontology#name"},"performs":{"@id":"https://www.ica.org/standards/RiC/ontology#performs","@type":"@id"},"beginningDate":{"@id":"https://www.ica.org/standards/RiC/ontology#beginningDate","@type":"http://www.w3.org/2001/XMLSchema#dateTime"},"endDate":{"@id":"https://www.ica.org/standards/RiC/ontology#endDate","@type":"http://www.w3.org/2001/XMLSchema#dateTime"},"performedBy":{"@id":"https://www.ica.org/standards/RiC/ontology#performedBy","@type":"@id"},"affects":{"@id":"https://www.ica.org/standards/RiC/ontology#affects","@type":"@id"},"resultsIn":{"@id":"https://www.ica.org/standards/RiC/ontology#resultsIn","@type":"@id"},"regulates":{"@id":"https://www.ica.org/standards/RiC/ontology#regulates","@type":"@id"},"identifier":{"@id":"https://www.ica.org/standards/RiC/ontology#identifier"},"creationRelationHasSource":{"@id":"https://www.ica.org/standards/RiC/ontology#creationRelationHasSource","@type":"@id"},"creationRelationHasTarget":{"@id":"https://www.ica.org/standards/RiC/ontology#creationRelationHasTarget","@type":"@id"},"physicalCharacteristics":{"@id":"https://www.ica.org/standards/RiC/ontology#physicalCharacteristics"},"hasCarrierType":{"@id":"https://www.ica.org/standards/RiC/ontology#hasCarrierType","@type":"@id"},"P60558":{"@id":"http://rdaregistry.info/Elements/u/P60558"},"instantiates":{"@id":"https://www.ica.org/standards/RiC/ontology#instantiates","@type":"@id"},"hasDerivedInstantiation":{"@id":"https://www.ica.org/standards/RiC/ontology#hasDerivedInstantiation","@type":"@id"},"resultsFrom":{"@id":"https://www.ica.org/standards/RiC/ontology#resultsFrom","@type":"@id"},"isDerivedFromInstantiation":{"@id":"https://www.ica.org/standards/RiC/ontology#isDerivedFromInstantiation","@type":"@id"},"normalizedDateValue":{"@id":"https://www.ica.org/standards/RiC/ontology#normalizedDateValue"},"agentIsTargetOfCreationRelation":{"@id":"https://www.ica.org/standards/RiC/ontology#agentIsTargetOfCreationRelation","@type":"@id"},"firstName":{"@id":"http://xmlns.com/foaf/0.1/firstName"},"lastName":{"@id":"http://xmlns.com/foaf/0.1/lastName"}}}
\ No newline at end of file
......@@ -25,6 +25,9 @@ import Utils._
import KafkaTopologyUtils._
class KafkaTopologyUtilsSpec extends AnyFunSuite with Matchers {
/*
test("Record with attached photo should pass preliminary tests") {
val imageRecord = loadFile("src/test/resources/image_input_1.json")
assert(!hasNoDigitalObject(imageRecord))
......@@ -45,4 +48,6 @@ class KafkaTopologyUtilsSpec extends AnyFunSuite with Matchers {
assert(hasNoLocator(imageRecord))
assert(!isNotPhotography(imageRecord))
}
*/
}
......@@ -24,21 +24,60 @@ import ch.memobase.Utils.loadFile
import ch.memobase.rdf.writer.RdfXmlWriter
import org.eclipse.rdf4j.rio.{RDFFormat, Rio}
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers
class ExtractorSpec extends AnyFunSuite{
val rawRico: String = {
loadFile("src/test/resources/raw1L.json")
//Extractors.jsonGraph(content)
//content
class ExtractorSpec extends AnyFunSuite with Matchers{
private lazy val rawRico = loadFile("src/test/resources/raw1L.json")
private lazy val rawErroneousFile = loadFile("src/test/resources/raw1LError.json")
private lazy val rawRicoIndent = loadFile("src/test/resources/rawIndent.json")
test ("read graph with Extractor") {
assert(Extractors.jsonGraph(rawRico).isSuccess)
}
test ("read not valid graph which should throw an Error") {
assert(Extractors.jsonGraph(rawErroneousFile).isFailure)
}
test ("read RecordId from file with indentations") {
val record = Extractors.record(Extractors.jsonGraph(rawRicoIndent).get.arr).get.obj
assert(Extractors.recordId(record).get == "https://memobase.ch/record/ati-001-L_44_13")
}
test ("create cultural heritage object (CHO") {
val graph = Extractors.jsonGraph(rawRicoIndent).get.arr
val record = Extractors.record(graph).get.obj
val cho = new ProvidedCHO(Extractors.recordId(record).get)
cho.addTitel(Extractors.title(record))
cho.addDescription(Extractors.dctAbstract(record))
cho.addDescription(Extractors.scopeAndContent(record))
cho.addDescription(Extractors.descriptiveNote(record))
/*
todo wir können präziser dcterms:temporal, dcterms:created and dcterms:issued verwenden
temporal und issued nutzt Sebastian in IIIF nicht
ich habe das im Beispielrecord auch noch nicht gesehen - gibt es das?
*/
cho.addCreationDate(Extractors.creationDate(graph)(record))
//creationDate
val rdfWriter = new RdfXmlWriter(System.out)
Rio.write(cho.getModel,rdfWriter)
}
test ("simple test") {
val edm = new EDM()
val rdfModel = new EDMRdf4jModel
//val s = t.get.obj._1
//val resource = rdfModel.createResource("#UEDIN:214")
println(rdfModel.serializeModel(new ProvidedCHO))
//println(rdfModel.serializeModel(new ProvidedCHO))
}
......@@ -49,9 +88,8 @@ class ExtractorSpec extends AnyFunSuite{
val record = Extractors.record(graph).get
val imageResourceId = Extractors.imageResourceId(digitalObject)
val recId = Extractors.recordId(record)
val cho = new ProvidedCHO
val cho = new ProvidedCHO(Extractors.recordId(record).get)
cho.addId(Extractors.recordId(record).get)
//use our own writer so we are able to create special EDM output requirements
val rdfWriter = new RdfXmlWriter(System.out)
Rio.write(cho.getModel,rdfWriter)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment