Commit f301a47c authored by Jonas Waeber's avatar Jonas Waeber
Browse files

Update RecordSet import.

parent e14e1195
Pipeline #21321 failed with stages
in 2 minutes and 54 seconds
......@@ -35,7 +35,7 @@ dependencies {
// https://mvnrepository.com/artifact/org.elasticsearch.client/elasticsearch-rest-high-level-client
//compile group: 'org.elasticsearch.client', name: 'elasticsearch-rest-high-level-client', version: '7.1.0'
implementation 'org.memobase:memobase-service-utilities:2.0.5'
implementation 'org.memobase:memobase-service-utilities:2.0.7'
// Logging Framework
implementation "org.apache.logging.log4j:log4j-api:${log4jV}"
......
......@@ -18,6 +18,7 @@
package org.memobase
import ch.memobase.rdf.DC
import ch.memobase.rdf.MB
import ch.memobase.rdf.NS
import ch.memobase.rdf.RDA
......@@ -25,17 +26,19 @@ import ch.memobase.rdf.RDF
import ch.memobase.rdf.RICO
import ch.memobase.rdf.SCHEMA
import ch.memobase.rdf.WD
import ch.memobase.rdf.XSD
import java.util.Properties
import org.apache.jena.datatypes.RDFDatatype
import org.apache.jena.rdf.model.Literal
import org.apache.jena.rdf.model.Model
import org.apache.jena.rdf.model.ModelFactory
import org.apache.jena.rdf.model.Property
import org.apache.jena.rdf.model.RDFNode
import org.apache.jena.rdf.model.Resource
import org.apache.jena.rdf.model.ResourceFactory
import org.apache.logging.log4j.LogManager
import org.memobase.model.Address
import org.memobase.model.Institution
import org.memobase.model.Link
import org.memobase.model.RecordSet
import org.memobase.model.RichText
......@@ -65,9 +68,12 @@ class RdfTransformer(properties: Properties) {
resource.addProperty(RICO.name, langLiteral(input.title, "de"))
resource.addProperty(RICO.name, langLiteral(input.title_fr, "fr"))
resource.addProperty(RICO.name, langLiteral(input.title_it, "it"))
addIfNotNull(resource, input.field_text, "de")
addIfNotNull(resource, input.field_text_fr, "fr")
addIfNotNull(resource, input.field_text_it, "it")
/* Description */
addRichTextLiteralIfNotNull(
resource,
listOf(input.field_text, input.field_text_fr, input.field_text_it),
RICO.descriptiveNote
)
input.field_address.forEach { address ->
val location = generateLocationResource(model, address)
......@@ -100,38 +106,38 @@ class RdfTransformer(properties: Properties) {
fun createRecordSet(input: RecordSet): Pair<String, Model> {
val model = ModelFactory.createDefaultModel()
val resource = model.createResource(NS.mbrs + input.field_memobase_id)
resource.addProperty(RDF.type, RICO.RecordSet)
// Publikations Status
resource.addLiteral(MB.isPublished, input.status)
resource.addProperty(RICO.identifiedBy, addIdentifier(model, "main", input.field_memobase_id))
if (input.field_old_memobase_id != null)
resource.addProperty(RICO.identifiedBy, addIdentifier(model, "oldMemobase", input.field_old_memobase_id))
input.field_institution.forEach {
resource.addProperty(RICO.heldBy, NS.mbcb + it)
}
// Beschreibung
addRichTextLiteralIfNotNull(
resource,
listOf(input.field_text, input.field_text_fr, input.field_text_it),
RICO.descriptiveNote
)
// 0.1 Titel (Memobase)
addTitle(resource, model, "main", listOf(input.title, input.title_fr, input.title_it))
// + convenience label on the resource directly.
resource.addProperty(RICO.title, langLiteral(input.title, "de"))
resource.addProperty(RICO.title, langLiteral(input.title_fr, "fr"))
resource.addProperty(RICO.title, langLiteral(input.title_it, "it"))
addIfNotNull(resource, input.field_text, "de")
addIfNotNull(resource, input.field_text_fr, "fr")
addIfNotNull(resource, input.field_text_it, "it")
input.field_metadata_language_codes.forEach {
// rico:hasLanguage metadata
resource.addProperty(RICO.hasLanguage, addLanguage(model, it))
}
// rico:hasTitle main
resource.addProperty(RICO.hasTitle, addTitle(model, input.title, input.title_fr, input.title_it))
resource.addProperty(RDA.hasSponsoringAgentOfResource, model.createResource(Util.memoriavUri))
// 0.2.1 Thumbnail Image
if (input.computed_teaser_image_url != null)
resource.addProperty(WD.image, literal(input.computed_teaser_image_url))
// 1.1 Inhalt
addRichTextLiteralIfNotNull(
resource,
listOf(input.field_content, input.field_content_fr, input.field_content_it),
RICO.scopeAndContent
)
// 1.2 Entstehungszeitraum
// is expected to always be a normalized value YYYY/YYYY.
input.field_time_period.let {
if (it != null) {
val date = model.createResource()
......@@ -140,38 +146,177 @@ class RdfTransformer(properties: Properties) {
resource.addProperty(RICO.isAssociatedWithDate, date)
}
}
// 1.3 Sprache
addLiteralIfNotNull(
resource,
listOf(input.field_language_de, input.field_language_fr, input.field_language_it),
RDA.hasLanguageOfResource
)
// 1.4 Zugang Memobase
// Why is this implemented as a list?
/*
addIfNotNull(
resource,
listOf(input.field_access_memobase, input.field_access_memobase_fr, input.field_access_memobase_it),
RICO.conditionsOfAccess
)
*/
// 2.1 Kontext
addRichTextLiteralIfNotNull(
resource,
listOf(input.field_context, input.field_context_fr, input.field_context_it),
RICO.history
)
// 3.1 Titel
// Originaltitle des Bestandes
addTitle(
resource,
model,
"original",
listOf(input.field_original_title, input.field_original_title_fr, input.field_original_title_it)
)
// 3.2 Umfang -> recordResourceExtent
addRichTextLiteralIfNotNull(
resource,
listOf(input.field_scope, input.field_scope_fr, input.field_scope_it),
RICO.recordResourceExtent
)
// 3.3 Auswahl / Vollständigkeit
addRichTextLiteralIfNotNull(
resource,
listOf(input.field_selection_de, input.field_selection_fr, input.field_selection_it),
RICO.integrity
)
// 3.4 Informationen zur Erschliessung
addRichTextLiteralIfNotNull(
resource,
listOf(
input.field_info_on_development_de,
input.field_info_on_development_fr,
input.field_info_on_development_it
),
DC.conformsTo
)
// 3.5 Sprache Metadaten Records
input.field_metadata_language_codes.forEach {
// rico:hasLanguage metadata
resource.addProperty(RICO.hasLanguage, addLanguage(model, it))
}
// 3.6 Rechte
addRichTextLiteralIfNotNull(
resource,
listOf(
input.field_rights_de,
input.field_rights_fr,
input.field_rights_it
),
RICO.conditionsOfUse
)
// 3.7 Original ID
input.field_original_id.let {
if (it != null) {
addIdentifier(model, "original", it)
}
}
// 3.8 Original Signatur
input.field_original_shelf_mark.let {
if (it != null) {
addIdentifier(model, "callNumber", it)
}
}
// 3.9 Beschreibung (Text)
// 3.9.1 Beschreibung (Link)
addOriginalRecordSetLink(
model,
resource,
listOf(
input.field_original_description,
input.field_original_description_fr,
input.field_original_description_it
)
)
// 4.1 Zugang
// Why is this implemented as a list?
/*
addIfNotNull(
resource,
listOf(input.field_access, input.field_access_fr, input.field_access_it),
RICO.conditionsOfAccess
)
*/
// 4.2 Zuständige Institution (Original)
addRelatedInstitution(model, resource, "original", input.field_resp_institution_original)
// 4.3 Zuständige Institution (Master)
addRelatedInstitution(model, resource, "master", input.field_resp_institution_master)
// 4.4 Zuständige Institution (Access)
addRelatedInstitution(model, resource, "access", input.field_resp_institution_access)
// Hard to deal with because these are lists. How to ensure that the correct translations are attached to each other?
// sometimes two documents have a different language each but no translations.
// 5.1 Projekt (Titel)
// 5.2 Projekt (Link)
// rdau:P60451 (RDA:hasSponsoringAgentOfResource)
// ---
// rico:CorporateBody
// rico:type "memoriavProject"
// rico:title
// schema:sameAs
// 5.3 Verwandte Bestände (Titel)
// 5.4 Verwandte Bestände (Link)
// rico:isRecordResourceAssociatedWithRecordResource
// ---
// rico:RecordSet
// rico:title
// schema:sameAs
// 5.5 Publikationen (Titel)
// 5.6 Publikationen (Link)
// rico:isSubjectOf
// ---
// rico:Record
// rico:title
// schema:sameAs
// 5.7 Dokumente (Titel)
// 5.8 Dokumente (Link)
// rico:isRecordResourceAssociatedWithRecordResource
// ---
// rico:Record
// rico:title
// schema:sameAs
// 6.1 Datenübernahme
addRichTextLiteralIfNotNull(
resource,
listOf(input.field_data_transfer, input.field_data_transfer_fr, input.field_data_transfer_it),
RICO.descriptiveNote
)
// 6.2 Datum der Übernahme in Memobase
input.field_transfer_date.let {
if (it != null) {
val date = it.split("T")[0]
val literal = model.createTypedLiteral(date, XSD.date)
resource.addLiteral(RICO.publicationDate, literal)
}
}
// 6.3 Datum letzte Aktualisierung in Memobase
val literal = model.createTypedLiteral(Util.now, XSD.dateTime)
resource.addLiteral(RICO.modificationDate, literal)
// 6.4 Memobase ID
resource.addProperty(RICO.identifiedBy, addIdentifier(model, "main", input.field_memobase_id))
// rico:scopeAndContent -> Inhalt
// rico:history -> Kontext
// rico:integrity -> Auswahl / Vollständigkeit
// dct:conformsTo -> Informationen zur Erschliessung
// rico:Identifier callNumber -> Original-Signatur des Bestands
// dct:created
// rico:type -> Dokumenttyp
// rico:descriptiveNote -> Datenübernahme
// rico:Title original
// rico:isRecordResourceAssociatedWithRecordResource -> Dokumente rico:Record ( sameAs + title) | Verwandte Bestände rico:RecordSet
// Originale Bestandesbeschreibung -> rico:RecordSet -> rico:hasSource
// rico:recordResourceOrInstantiationIsTargetOfRecordResourceHoldingRelation
// rico:recordResourceExtent -> Umfang
// rico:isSubjectOf -> publications rico:Record (sameAs + title)
// rico:conditionsOfUse -> Rechte
// rico:conditionsOfAccess -> Zugang
// rico:publicationDate -> Datum der Übernahme in Memobase
// rico:modificationDate (is enriched when indexed, is that enough? Should I do this here?).
// hasTitle original-> Originaltitel des Bestands
// schema:sameAs -> Projektname und Beschreibung | Publikationen
// rdau:P60099 has language of resource -> What is this supposed to do?
// rdau:P60470 has note on resource -> Hinweise
// rdau:P60848 has referential resource relationship with
// 8.1 Unterstützt durch Memoriav
resource.addProperty(RDA.hasSponsoringAgentOfResource, model.createResource(Util.memoriavUri))
// 8.2 Institution
input.field_institution.forEach {
resource.addProperty(RICO.heldBy, NS.mbcb + it)
}
// 9.3 Alte Memobase ID
if (input.field_old_memobase_id != null)
resource.addProperty(RICO.identifiedBy, addIdentifier(model, "oldMemobase", input.field_old_memobase_id))
return Pair(resource.uri, model)
}
......@@ -183,7 +328,6 @@ class RdfTransformer(properties: Properties) {
return identifier
}
// TODO: Add language translations.
private fun addLanguage(model: Model, value: String): Resource {
val language = model.createResource()
language.addProperty(RDF.type, RICO.Language)
......@@ -201,14 +345,17 @@ class RdfTransformer(properties: Properties) {
return language
}
private fun addTitle(model: Model, de: String, fr: String, it: String): Resource {
private fun addTitle(resource: Resource, model: Model, type: String, titles: List<String?>) {
if (titles.all { it == null })
return
val language = model.createResource()
language.addProperty(RDF.type, RICO.Title)
language.addProperty(RICO.type, literal("main"))
language.addProperty(RICO.title, langLiteral(de, "de"))
language.addProperty(RICO.title, langLiteral(fr, "fr"))
language.addProperty(RICO.title, langLiteral(it, "it"))
return language
language.addProperty(RICO.type, literal(type))
titles.forEachIndexed { index, s ->
if (s != null)
language.addProperty(RICO.title, langLiteral(s, getLanguage(index)))
}
resource.addProperty(RICO.hasTitle, language)
}
private fun generateLocationResource(model: Model, address: Address): Resource {
......@@ -276,17 +423,55 @@ class RdfTransformer(properties: Properties) {
private fun literal(text: String): Literal = ResourceFactory.createPlainLiteral(text.trim())
private fun addIfNotNull(resource: Resource, field: RichText?, language: String) {
field.let {
if (it != null)
resource.addProperty(RICO.descriptiveNote, langLiteral(it.value, language))
private fun addRichTextLiteralIfNotNull(resource: Resource, field: List<RichText?>, property: Property) {
field.forEachIndexed { index, s ->
if (s != null)
resource.addProperty(property, langLiteral(s.value, getLanguage(index)))
}
}
private fun addIfNotNull(resource: Resource, field: String?, property: Property, node: RDFNode) {
field.let {
if (it != null)
resource.addProperty(property, node)
private fun getLanguage(index: Int): String {
return when (index) {
0 -> "de"
1 -> "fr"
2 -> "it"
else -> "un"
}
}
private fun addLiteralIfNotNull(resource: Resource, fields: List<String?>, property: Property) {
fields.forEachIndexed { index, s ->
if (s != null)
resource.addProperty(property, langLiteral(s, getLanguage(index)))
}
}
private fun addOriginalRecordSetLink(model: Model, resource: Resource, links: List<Link?>) {
if (links.all { it == null })
return
val recordSet = model.createResource()
recordSet.addProperty(RDF.type, RICO.RecordSet)
links.forEachIndexed { index, link ->
if (link != null) {
recordSet.addLiteral(SCHEMA.sameAs, langLiteral(link.uri, getLanguage(index)))
link.title.let { title ->
if (title != null) {
recordSet.addLiteral(RICO.title, langLiteral(title, getLanguage(index)))
}
}
}
}
resource.addProperty(RICO.hasSource, recordSet)
}
private fun addRelatedInstitution(model: Model, resource: Resource, type: String, item: List<String>) {
item.forEach {
val node = model.createResource()
node.addProperty(RDF.type, RICO.RecordResourceHoldingRelation)
node.addProperty(RICO.type, type)
node.addProperty(RICO.recordResourceHoldingRelationHasTarget, resource)
node.addProperty(RICO.recordResourceHoldingRelationHasSource, NS.mbrs + it)
resource.addProperty(RICO.recordResourceOrInstantiationIsTargetOfRecordResourceHoldingRelation, node)
}
}
}
\ No newline at end of file
......@@ -22,6 +22,8 @@ import java.io.File
import java.io.FileInputStream
import java.io.FileNotFoundException
import java.io.StringWriter
import java.time.LocalDateTime
import java.time.format.DateTimeFormatter
import kotlin.system.exitProcess
import org.apache.jena.rdf.model.Model
import org.apache.jena.riot.RDFDataMgr
......@@ -34,7 +36,7 @@ object Util {
const val languageSourceFilePathPropertyName = "path.languages"
const val memoriavUri = NS.mbcb + "mrv"
val now: String = LocalDateTime.now().format(DateTimeFormatter.ISO_DATE_TIME)
private val wikidataNamespace = "http://www.wikidata.org/entity/"
private val log = LogManager.getLogger("DrupalSyncHelpers")
......
......@@ -18,24 +18,109 @@
package org.memobase.model
data class RecordSet(
/* Publikations Status */
val status: Boolean,
/* 0.1 Titel (Memobase) */
val title: String,
val title_fr: String,
val title_it: String,
val status: Boolean,
/* 0.2.1 Thumbnail Searchresult */
val computed_teaser_image_url: String?,
/* 1.1 Inhalt */
val field_content: RichText?,
val field_content_fr: RichText?,
val field_content_it: RichText?,
/* 1.2 Entstehungszeitrum */
val field_time_period: String?,
/// 1.3 Sprache
val field_language_de: String?,
val field_language_fr: String?,
val field_language_it: String?,
/* 1.4 Zugang Memobase */
val field_access_memobase: List<RichText>,
val field_access_memobase_fr: List<RichText>,
val field_access_memobase_it: List<RichText>,
/* 2.1 Kontext */
val field_context: RichText?,
val field_context_fr: RichText?,
val field_context_it: RichText?,
/* 3.1 Titel (original) */
val field_original_title: String?,
val field_original_title_fr: String?,
val field_original_title_it: String?,
/* 3.2 Umfang */
val field_scope: RichText?,
val field_scope_fr: RichText?,
val field_scope_it: RichText?,
/* 3.3 Auswahl */
val field_selection_de: RichText?,
val field_selection_fr: RichText?,
val field_selection_it: RichText?,
// 3.4 Informationen zur Erschliessung
val field_info_on_development_de: RichText?,
val field_info_on_development_fr: RichText?,
val field_info_on_development_it: RichText?,
/* 3.5 Sprache Metadaten */
val field_metadata_language_codes: List<String>,
/* 3.6 Rechte */
val field_rights_de: RichText?,
val field_rights_fr: RichText?,
val field_rights_it: RichText?,
/* 3.7 Originaler Identifier */
val field_original_id: String?,
/* 3.8 Bestandes Signatur */
val field_original_shelf_mark: String?,
/* 3.9 Beschreibung */
val field_text: RichText?,
val field_text_fr: RichText?,
val field_text_it: RichText?,
/* 3.9 Original Bestandesbeschreibung (Titel) */
/* 3.9.1 Original Bestandesbeschreibung (Link) */
val field_original_description: Link?,
val field_original_description_it: Link?,
val field_original_description_fr: Link?,
/* 4.1 Zugang */
val field_access: List<RichText>,
val field_access_fr: List<RichText>,
val field_access_it: List<RichText>,
/* 4.2 Original Institution */
val field_resp_institution_original: List<String>,
/* 4.3 Master Institution */
val field_resp_institution_master: List<String>,
/* 4.4 Zugangs Institution*/
val field_resp_institution_access: List<String>,
/* 5.1 Projektname */
/* 5.2 Projektbeschreibung (Link) */
val field_project: List<Link>,
val field_project_fr: List<Link>,
val field_project_it: List<Link>,
/* 5.1 Verwandte Bestände (Titel) */
/* 5.2 Verwandte Bestände (Link) */
val field_related_record_sets: List<Link>,
val field_related_record_sets_fr: List<Link>,
val field_related_record_sets_it: List<Link>,
/* 5.5 Puplikation (Titel) */
/* 5.6 Publikation (Link) */
val field_publications: List<Link>,
val field_publications_fr: List<Link>,
val field_publications_it: List<Link>,
/* 5.7 Verwandte Dokumente (Titel) */
/* 5.8 Verwandte Dokumente (Link) */
val field_documents: Link?,
val field_documents_fr: Link?,
val field_documents_it: Link?,
/* 6.1 Datenübernahme */
val field_data_transfer: RichText?,
val field_data_transfer_fr: RichText?,
val field_data_transfer_it: RichText?,
/* 6.2 Datum Übernahme (YYYY-MM-DD) */
val field_transfer_date: String?,
/* 6.4 Memobase ID (Neu)*/
val field_memobase_id: String,
val field_old_memobase_id: String? = null,
/* 8.1 Unterstützt durch Memoriav */
val field_supported_by_memoriav: Boolean,
val field_metadata_language_codes: List<String>,
/* 8.2 Institution */
val field_institution: List<String>,
val computed_teaser_image_url: String? = null,
val field_time_period: String?
/* 9.3 Memobase ID (Alt)*/
val field_old_memobase_id: String?
) : Input("RecordSet")
\ No newline at end of file
......@@ -71,7 +71,7 @@ class TestRecordSets {
assertAll("",
{
assertThat(result.first)
.isEqualTo("https://memobase.ch/recordSet/sap-021")
.isEqualTo("https://memobase.ch/recordSet/testComplete")
}
)
}
......@@ -114,7 +114,7 @@ class TestRecordSets {
},
{
assertThat(key)
.isEqualTo("https://memobase.ch/recordSet/sap-021")
.isEqualTo("https://memobase.ch/recordSet/testComplete")
}
)
}
......
......@@ -22,28 +22,30 @@
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix dc: <http://purl.org/dc/elements/1.1/> .
mbrs:sap-021 a rico:RecordSet ;
mbrs:testComplete a rico:RecordSet ;
rdau:P60451 mbcb:mrv ;
internal:isPublished true ;
wdt:P18 "https://mb-wf1.memobase.unibas.ch/sites/default/files/styles/teaser/public/2020-11/StadtArchivSchaffhausenGeb2.jpg?itok=2PsMvPqc" ;
internal:isPublished false ;
rico:descriptiveNote "<p>Beschreibung</p>\r\n\r\n<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>"@de , "<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>"@fr , "<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>"@it ;
rico:hasLanguage [ a rico:Language ;
schema:sameAs "http://www.wikidata.org/entity/Q188" ;
rico:name "Deutsch"@de , "Allemand"@fr , "Tedesco"@it ;
rico:type "metadata"
] ;
rico:hasTitle [ a rico:Title ;
rico:title "Videobestand Marie-Jane Otth"@de , "Videobestand Marie-Jane Otth"@fr , "Videobestand Marie-Jane Otth"@it ;
rico:title "Complete Example"@de , "Complete Example"@fr , "Complete Example"@it ;
rico:type "main"
] ;