Commit 12d7ab0f authored by Günter Hipler's avatar Günter Hipler
Browse files

first EDM document we can use to talk about

parent cb716f4a
Pipeline #22995 passed with stages
in 6 minutes and 31 seconds
......@@ -58,7 +58,15 @@ und mit Daniel/Silvia abstimmen, wie diese Zusammenhänge nach Europeana übertr
- für Silvia sind auch noch Punkte offen
- jetzt nutze ich isHeldBy und isPartof für institution und recordset
- im Moment nutze ich die Informationen von heldBy und isPartOf für recordset und institution Einträge im ES Dokument
Diese Informationen können dann für sets in der OAI Schnittstelle verwendet werden
#### owl:sameAs (CHO object)
- welcher link soll genommen werden?
#### skos:prefLabel
ist es korrekt, dafür den Extractor Extractors.dcTermsSpatial(graph)(record.value) zu verwenden??
## allgemeine Fragen
......
......@@ -21,7 +21,7 @@
package ch.memobase.edm
import ch.memobase.edm.subjects.{ModelXMLTransformer, ProvidedCHO, WebResource}
import ch.memobase.edm.subjects.{Aggregation, Concept, ModelXMLTransformer, Place, ProvidedCHO, TimeSpan, WebResource}
import java.time.format.DateTimeFormatter
import scala.collection.mutable
......@@ -29,7 +29,6 @@ import scala.collection.mutable.ArrayBuffer
import scala.util.Try
import ujson.{Arr => JArr, Obj => JObj}
import scala.jdk.CollectionConverters._
class EDM {
......@@ -51,9 +50,19 @@ class EDM {
val choExtraction = createChoObject(graph,record.get,digitalObject)
val webExtraction = createWebResources(graph,record.get,digitalObject)
//actually only one instance - correct?
val placeExtraction: ExtractionResult[Place] = createPlace(graph,record.get,digitalObject)
val aggregationExtraction: ExtractionResult[Aggregation] = createAggregation(graph,record.get,digitalObject)
val conceptExtraction: ExtractionResult[Concept] = createConcept(graph,record.get,digitalObject)
val timespanExtraction: ExtractionResult[TimeSpan] = createTimeSpan(graph,record.get,digitalObject)
webExtraction.obj.foreach(webResource =>
choExtraction.obj.getModel.addAll(webResource.getModel)
)
choExtraction.obj.getModel.addAll(placeExtraction.obj.getModel)
//choExtraction.obj.getModel.addAll(aggregationExtraction.obj.getModel)
//choExtraction.obj.getModel.addAll(conceptExtraction.obj.getModel)
//choExtraction.obj.getModel.addAll(timespanExtraction.obj.getModel)
val esObject = ModelXMLTransformer(model = choExtraction.obj.getModel,
id = recordId,
......@@ -63,15 +72,12 @@ class EDM {
.map( identInstitution => EDM.getInstitutionOrRecordsetIdent(identInstitution))
)
val result = ExtractionResult((
//todo: by now we do not collect any infos and warnings
ExtractionResult((
recordId,
esObject
)
, new ArrayBuffer[String]())
result
,new ArrayBuffer[String]())
}
}
......@@ -159,7 +165,8 @@ class EDM {
Extractors.dcTermsIssuedTemporal(graph)(record.value)("temporal")
.foreach(c => cho.addDcTermsTemporal(Some(c)))
Extractors.edmType(record.value)
.foreach(c => cho.addEdmType(Some(c)))
ExtractionResult(cho)
......@@ -173,13 +180,65 @@ class EDM {
ExtractionResult(webresource.getOrElse(List()) )
}
def createPlace(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[Place] = {
//how many places instances are available??
val place = new Place(Extractors.recordId(record).get)
Extractors.dcTermsSpatial(graph)(record.value)
.foreach(c => place.addSkosPrefLabel(Some(c)))
//val place: Option[List[WebResource]] = Extractors.recordId(record).map(id => List[Place](new Place(id)))
ExtractionResult(place )
}
def createConcept(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[Concept] = {
//how many places instances are available??
//is it correct to use recordId
val concept = new Concept(Extractors.recordId(record).get)
//val concept = new Concept("http://iwerk.ch/1234")
concept.addSkosPrefLabel(Some("only test for concept"))
//val place: Option[List[WebResource]] = Extractors.recordId(record).map(id => List[Place](new Place(id)))
ExtractionResult(concept)
}
def createAggregation(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[Aggregation] = {
//how many places instances are available??
//is it correct to use recordId
val aggregation = new Aggregation(Extractors.recordId(record).get)
//val aggregation = new Aggregation("http://iwerk.ch/4567")
aggregation.addSkosPrefLabel(Some("only test for aggregation"))
//val place: Option[List[WebResource]] = Extractors.recordId(record).map(id => List[Place](new Place(id)))
ExtractionResult(aggregation)
}
def createTimeSpan(graph: JArr, record: JObj, digitalObject: JObj): ExtractionResult[TimeSpan] = {
//how many places instances are available??
//is it correct to use recordId
val timespan = new TimeSpan(Extractors.recordId(record).get)
//val timespan = new TimeSpan("http://iwerk.ch/891011")
timespan.addSkosPrefLabel(Some("only test for timespan"))
//val place: Option[List[WebResource]] = Extractors.recordId(record).map(id => List[Place](new Place(id)))
ExtractionResult(timespan)
}
/*
todo: my assumption: recordset and institution have cardinality n
*/
def extractRecordset(graph: JArr, record: JObj): List[String] = ???
//def extractRecordset(graph: JArr, record: JObj): List[String] = ???
def extractInstitution(graph: JArr, record: JObj): List[String] = ???
//def extractInstitution(graph: JArr, record: JObj): List[String] = ???
}
......
......@@ -156,6 +156,23 @@ object Extractors {
stringValue(v)("name")
)
//todo: this is just a quick mapping
//do we need some kind of configuration or is there already one?
private val edmTypeMapping = Map(
"film" -> "VIDEO",
"foto" -> "IMAGE",
"radio" -> "SOUND",
"ton" -> "SOUND",
"tonbildschau" -> "no mapping for audio-visual show",
"tv" -> "VIDEO",
"video" -> "VIDEO")
val edmType
:mutable.LinkedHashMap[String, JValue] => Option[String] =
record => stringValue(record)("type")
.map(t => edmTypeMapping.getOrElse(t.toLowerCase,s"no mapping for ${t} as rico:type"))
val publishedByGH
: JArr => mutable.LinkedHashMap[String, JValue] => List[String] =
graph =>
......
......@@ -23,6 +23,7 @@ package ch.memobase.edm.subjects
import ch.memobase.rdf.writer.RdfXmlWriter
import org.eclipse.rdf4j.model.Model
import org.eclipse.rdf4j.rio.Rio
import org.eclipse.rdf4j.rio.rdfxml.RDFXMLWriter
import java.io.StringWriter
import java.time.ZonedDateTime
......@@ -43,7 +44,9 @@ object ModelXMLTransformer {
): String = {
val sOut = new StringWriter
//we use the adapted Writer for EDM
val rdfWriter = new RdfXmlWriter(sOut)
//val rdfWriter = new RDFXMLWriter(sOut)
Rio.write(model, rdfWriter)
//create whole ES structure and replace XML prolog
......
......@@ -21,18 +21,18 @@
package ch.memobase.edm.subjects
import ch.memobase.rdf.Helper
import ch.memobase.rdf.vocabularies.EDM
import ch.memobase.rdf.vocabularies.{EDMVocab, OreVocab}
import org.eclipse.rdf4j.model.Model
import org.eclipse.rdf4j.model.impl.SimpleValueFactory
import org.eclipse.rdf4j.model.util.Values.iri
import org.eclipse.rdf4j.model.vocabulary.{DC, DCTERMS, RDF}
import org.eclipse.rdf4j.model.vocabulary.{DC, DCTERMS, RDF, SKOS}
class ProvidedCHO (val id: String) {
import org.eclipse.rdf4j.model.util.Values.iri
//import org.eclipse.rdf4j.model.util.Values.literal
private val model = Helper.getModelWithEDMNamespaces
model.add(iri(id),RDF.TYPE, EDM.ProvidedCHO)
model.add(iri(id),RDF.TYPE, EDMVocab.ProvidedCHO)
private val factory = SimpleValueFactory.getInstance()
......@@ -92,17 +92,83 @@ class ProvidedCHO (val id: String) {
def addDcTermsTemporal(temporal:Option[String]): Unit =
temporal.map(t => model.add(iri(id),DCTERMS.TEMPORAL,factory.createLiteral(t)))
def addEdmType(edmtype:Option[String]): Unit =
edmtype.map(t => model.add(iri(id),EDMVocab.TYPE,factory.createLiteral(t)))
def getModel: Model = model
}
class WebResource (val id: String) {
private val model = Helper.getModelWithEDMNamespaces
model.add(iri(id),RDF.TYPE, EDM.WebResource)
private val factory = SimpleValueFactory.getInstance()
model.add(iri(id),RDF.TYPE, EDMVocab.WebResource)
def getModel: Model = model
}
class Aggregation(val id: String) {
private val model = Helper.getModelWithEDMNamespaces
model.add(iri(id),RDF.TYPE, OreVocab.AGGREGATION)
private val factory = SimpleValueFactory.getInstance()
def addSkosPrefLabel(skospreflabel:Option[String]): Unit =
skospreflabel.map(t => model.add(iri(id),SKOS.PREF_LABEL,factory.createLiteral(t)))
def getModel: Model = model
}
class Aggregation
class Concept(val id: String) {
private val model = Helper.getModelWithEDMNamespaces
model.add(iri(id),RDF.TYPE, SKOS.CONCEPT)
private val factory = SimpleValueFactory.getInstance()
def addSkosPrefLabel(skospreflabel:Option[String]): Unit =
skospreflabel.map(t => model.add(iri(id),SKOS.PREF_LABEL,factory.createLiteral(t)))
def getModel: Model = model
}
class TimeSpan(val id: String) {
//not used so far in Silvia mapping
private val model = Helper.getModelWithEDMNamespaces
model.add(iri(id),RDF.TYPE, EDMVocab.TIMESPAN)
private val factory = SimpleValueFactory.getInstance()
def addSkosPrefLabel(skospreflabel:Option[String]): Unit =
skospreflabel.map(t => model.add(iri(id),SKOS.PREF_LABEL,factory.createLiteral(t)))
def getModel: Model = model
}
class Place(val id: String) {
//not used so far in Silvia mapping
private val model = Helper.getModelWithEDMNamespaces
model.add(iri(id),RDF.TYPE, EDMVocab.PLACE)
private val factory = SimpleValueFactory.getInstance()
def addSkosPrefLabel(skospreflabel:Option[String]): Unit =
skospreflabel.map(t => model.add(iri(id),SKOS.PREF_LABEL,factory.createLiteral(t)))
def getModel: Model = model
}
......@@ -29,11 +29,22 @@ sealed abstract class VocabularyFactory(val NAMESPACE: String) {
}
object EDM extends VocabularyFactory("http://www.europeana.eu/schemas/edm/") {
object EDMVocab extends VocabularyFactory("http://www.europeana.eu/schemas/edm/") {
val ProvidedCHO: IRI = getIri("ProvidedCHO")
val WebResource: IRI = getIri("WebResource")
val TIMESPAN: IRI = getIri("TimeSpan")
val PLACE: IRI = getIri("Place")
val TYPE: IRI = getIri("type")
}
object OreVocab extends VocabularyFactory("http://www.openarchives.org/ore/terms/") {
val AGGREGATION: IRI = getIri("Aggregation")
}
......@@ -316,6 +316,13 @@ class CHOSpec extends AnyFunSuite with Matchers{
}
test ("edm type") {
val graph = Extractors.jsonGraph(ricoContributorP60441).get.arr
val record = Extractors.record(graph)
val edmType = Extractors.edmType(record.get)
assert(edmType.isDefined && edmType.get == "SOUND")
}
}
/*
* rico2edm
* Copyright (C) 2021 UB Basel
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
package ch.memobase.edm
import ch.memobase.Utils.loadFile
import ch.memobase.edm.subjects.Aggregation
import ch.memobase.rdf.writer.RdfXmlWriter
import org.eclipse.rdf4j.rio.Rio
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers
import java.io.StringWriter
import scala.util.{Failure, Success, Try}
class EDMSpec extends AnyFunSuite with Matchers {
private lazy val ricoWithContributors = loadFile("src/test/resources/raw.contributor.json")
test ("complete EDM creation") {
val edm = new EDM
val edmCreationResult: Try[ExtractionResult[(String, String)]] = edm.create(ricoWithContributors)
assert(edmCreationResult.isSuccess)
edmCreationResult match {
case Success(value) =>
println(value.obj._1)
println(value.obj._2)
case Failure(exception) => println(exception)
}
}
test("aggregation creation" ) {
val graph = Extractors.jsonGraph(ricoWithContributors).get.arr
val record = Extractors.record(graph)
val recordId = Extractors.recordId(record.get).get
val agg = new Aggregation(recordId)
val sOut = new StringWriter
val rdfWriter = new RdfXmlWriter(sOut)
Rio.write(agg.getModel, rdfWriter)
println(sOut)
}
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment