Commit a2206e08 authored by Jonas Waeber's avatar Jonas Waeber
Browse files

Add additional fields to record set.

parent f301a47c
......@@ -32,10 +32,7 @@ ext {
}
dependencies {
// https://mvnrepository.com/artifact/org.elasticsearch.client/elasticsearch-rest-high-level-client
//compile group: 'org.elasticsearch.client', name: 'elasticsearch-rest-high-level-client', version: '7.1.0'
implementation 'org.memobase:memobase-service-utilities:2.0.7'
implementation 'org.memobase:memobase-service-utilities:2.0.8'
// Logging Framework
implementation "org.apache.logging.log4j:log4j-api:${log4jV}"
......
......@@ -28,8 +28,18 @@ spec:
volumeMounts:
- name: metadata-language-labels
mountPath: "/configs/languages/"
- name: canton-labels
mountPath: "/configs/cantons/"
- name: municipality-labels
mountPath: "/configs/municipalities/"
volumes:
- name: metadata-language-labels
configMap:
name: "{{ .Values.metadataLanguageLabels }}"
- name: cantons-labels
configMap:
name: "{{ .Values.cantonLabels }}"
- name: municipality-labels
configMap:
name: "{{ .Values.municipalityLabels }}"
restartPolicy: Always
......@@ -9,4 +9,8 @@ outputTopic: drupal-sync-output
kafkaConfigs: prod-kafka-bootstrap-servers
metadataLanguageLabels: metadata-language-labels-csv
pathLanguageSource: /configs/languages/metadata_language_labels.csv
\ No newline at end of file
pathLanguageSource: /configs/languages/labels.csv
cantonLabels: canton-labels-csv
pathCantonLabels: /configs/cantons/labels.csv
municipalityLabels: municipality-labels-csv
pathMunicipaltiyLabels: /configs/municipalities/labels.csv
\ No newline at end of file
......@@ -10,10 +10,13 @@ object JSON {
private val log = LogManager.getLogger("JsonParser")
private val klaxon = Klaxon()
fun parseJson(data: String): List<Input> {
fun parseJson(key: String, data: String): List<Input> {
return try {
val int = klaxon.parse<Input>(StringReader(data))
return if (int == null) emptyList()
return if (int == null) {
log.error("No input parsed for message: $key.")
emptyList()
}
else listOf(int)
} catch (ex: KlaxonException) {
log.error(ex.localizedMessage)
......
......@@ -28,13 +28,13 @@ import org.memobase.model.Institution
import org.memobase.model.RecordSet
class KafkaTopology(private val settings: SettingsLoader) {
private val log = LogManager.getLogger("DrupalSyncProcessor")
private val log = LogManager.getLogger(this::class.java)
private val transformer = RdfTransformer(settings.appSettings)
fun build(): Topology {
val builder = StreamsBuilder()
val stream = builder.stream<String, String>(settings.inputTopic)
stream
.flatMapValues { value -> JSON.parseJson(value) }
.flatMapValues { readOnlyKey, value -> JSON.parseJson(readOnlyKey, value) }
.flatMapValues { value -> mapJson(value) }
.map { _, value -> Util.writeModel(value.first, value.second) }
.to(settings.outputTopic)
......
......@@ -28,7 +28,6 @@ import ch.memobase.rdf.SCHEMA
import ch.memobase.rdf.WD
import ch.memobase.rdf.XSD
import java.util.Properties
import org.apache.jena.datatypes.RDFDatatype
import org.apache.jena.rdf.model.Literal
import org.apache.jena.rdf.model.Model
import org.apache.jena.rdf.model.ModelFactory
......@@ -43,7 +42,7 @@ import org.memobase.model.RecordSet
import org.memobase.model.RichText
class RdfTransformer(properties: Properties) {
private val log = LogManager.getLogger("RdfTransformer")
private val log = LogManager.getLogger(this::class.java)
private val cantons = Util.getCantons()
private val municipalities = Util.getMunicipalities()
......@@ -114,14 +113,14 @@ class RdfTransformer(properties: Properties) {
// Beschreibung
addRichTextLiteralIfNotNull(
resource,
listOf(input.field_text, input.field_text_fr, input.field_text_it),
listOf(input.field_text_de, input.field_text_fr, input.field_text_it),
RICO.descriptiveNote
)
// 0.1 Titel (Memobase)
addTitle(resource, model, "main", listOf(input.title, input.title_fr, input.title_it))
addTitle(resource, model, "main", listOf(input.title_de, input.title_fr, input.title_it))
// + convenience label on the resource directly.
resource.addProperty(RICO.title, langLiteral(input.title, "de"))
resource.addProperty(RICO.title, langLiteral(input.title_de, "de"))
resource.addProperty(RICO.title, langLiteral(input.title_fr, "fr"))
resource.addProperty(RICO.title, langLiteral(input.title_it, "it"))
......@@ -132,7 +131,7 @@ class RdfTransformer(properties: Properties) {
// 1.1 Inhalt
addRichTextLiteralIfNotNull(
resource,
listOf(input.field_content, input.field_content_fr, input.field_content_it),
listOf(input.field_content_de, input.field_content_fr, input.field_content_it),
RICO.scopeAndContent
)
......@@ -154,18 +153,15 @@ class RdfTransformer(properties: Properties) {
)
// 1.4 Zugang Memobase
// Why is this implemented as a list?
/*
addIfNotNull(
addRichTextLiteralIfNotNull(
resource,
listOf(input.field_access_memobase, input.field_access_memobase_fr, input.field_access_memobase_it),
listOf(input.field_access_memobase_de, input.field_access_memobase_fr, input.field_access_memobase_it),
RICO.conditionsOfAccess
)
*/
// 2.1 Kontext
addRichTextLiteralIfNotNull(
resource,
listOf(input.field_context, input.field_context_fr, input.field_context_it),
listOf(input.field_context_de, input.field_context_fr, input.field_context_it),
RICO.history
)
// 3.1 Titel
......@@ -174,12 +170,12 @@ class RdfTransformer(properties: Properties) {
resource,
model,
"original",
listOf(input.field_original_title, input.field_original_title_fr, input.field_original_title_it)
listOf(input.field_original_title_de, input.field_original_title_fr, input.field_original_title_it)
)
// 3.2 Umfang -> recordResourceExtent
addRichTextLiteralIfNotNull(
resource,
listOf(input.field_scope, input.field_scope_fr, input.field_scope_it),
listOf(input.field_scope_de, input.field_scope_fr, input.field_scope_it),
RICO.recordResourceExtent
)
// 3.3 Auswahl / Vollständigkeit
......@@ -200,7 +196,6 @@ class RdfTransformer(properties: Properties) {
)
// 3.5 Sprache Metadaten Records
input.field_metadata_language_codes.forEach {
// rico:hasLanguage metadata
resource.addProperty(RICO.hasLanguage, addLanguage(model, it))
}
// 3.6 Rechte
......@@ -231,21 +226,18 @@ class RdfTransformer(properties: Properties) {
model,
resource,
listOf(
input.field_original_description,
input.field_original_description_de,
input.field_original_description_fr,
input.field_original_description_it
)
)
// 4.1 Zugang
// Why is this implemented as a list?
/*
addIfNotNull(
addRichTextLiteralIfNotNull(
resource,
listOf(input.field_access, input.field_access_fr, input.field_access_it),
listOf(input.field_access_de, input.field_access_fr, input.field_access_it),
RICO.conditionsOfAccess
)
*/
// 4.2 Zuständige Institution (Original)
addRelatedInstitution(model, resource, "original", input.field_resp_institution_original)
// 4.3 Zuständige Institution (Master)
......@@ -253,45 +245,58 @@ class RdfTransformer(properties: Properties) {
// 4.4 Zuständige Institution (Access)
addRelatedInstitution(model, resource, "access", input.field_resp_institution_access)
// Hard to deal with because these are lists. How to ensure that the correct translations are attached to each other?
// sometimes two documents have a different language each but no translations.
// Currently each project link is added as a separate entity.
// 5.1 Projekt (Titel)
// 5.2 Projekt (Link)
// rdau:P60451 (RDA:hasSponsoringAgentOfResource)
// ---
// rico:CorporateBody
// rico:type "memoriavProject"
// rico:title
// schema:sameAs
input.field_project_de.forEach { link ->
addMemoriavProject(resource, model, link, "de")
}
input.field_project_fr.forEach { link ->
addMemoriavProject(resource, model, link, "fr")
}
input.field_project_it.forEach { link ->
addMemoriavProject(resource, model, link, "it")
}
// Currently each record set link is added as a separate entity.
// 5.3 Verwandte Bestände (Titel)
// 5.4 Verwandte Bestände (Link)
// rico:isRecordResourceAssociatedWithRecordResource
// ---
// rico:RecordSet
// rico:title
// schema:sameAs
input.field_related_record_sets_de.forEach { link ->
addRelatedRecordSet(resource, model, link, "de")
}
input.field_related_record_sets_fr.forEach { link ->
addRelatedRecordSet(resource, model, link, "fr")
}
input.field_related_record_sets_it.forEach { link ->
addRelatedRecordSet(resource, model, link, "it")
}
// Currently each publication link is added as a separate entity.
// 5.5 Publikationen (Titel)
// 5.6 Publikationen (Link)
// rico:isSubjectOf
// ---
// rico:Record
// rico:title
// schema:sameAs
input.field_publications_de.forEach { link ->
addRelatedRecord(resource, model, link, "de", RICO.isSubjectOf)
}
input.field_publications_fr.forEach { link ->
addRelatedRecord(resource, model, link, "fr", RICO.isSubjectOf)
}
input.field_publications_it.forEach { link ->
addRelatedRecord(resource, model, link, "it", RICO.isSubjectOf)
}
// Currently each record link is added as a separate entity.
// 5.7 Dokumente (Titel)
// 5.8 Dokumente (Link)
// rico:isRecordResourceAssociatedWithRecordResource
// ---
// rico:Record
// rico:title
// schema:sameAs
input.field_publications_de.forEach { link ->
addRelatedRecord(resource, model, link, "de", RICO.isRecordResourceAssociatedWithRecordResource)
}
input.field_publications_fr.forEach { link ->
addRelatedRecord(resource, model, link, "fr", RICO.isRecordResourceAssociatedWithRecordResource)
}
input.field_publications_it.forEach { link ->
addRelatedRecord(resource, model, link, "it", RICO.isRecordResourceAssociatedWithRecordResource)
}
// 6.1 Datenübernahme
addRichTextLiteralIfNotNull(
resource,
listOf(input.field_data_transfer, input.field_data_transfer_fr, input.field_data_transfer_it),
listOf(input.field_data_transfer_de, input.field_data_transfer_fr, input.field_data_transfer_it),
RICO.descriptiveNote
)
// 6.2 Datum der Übernahme in Memobase
......@@ -358,6 +363,38 @@ class RdfTransformer(properties: Properties) {
resource.addProperty(RICO.hasTitle, language)
}
private fun addMemoriavProject(resource: Resource, model: Model, link: Link, language: String) {
val node = model.createResource()
node.addProperty(RDF.type, RICO.CorporateBody)
node.addProperty(RICO.type, Util.memoriavProject)
link.title.let {
if (it != null && it.isNotEmpty())
node.addProperty(RICO.title, langLiteral(it, language))
}
node.addProperty(SCHEMA.sameAs, literal(link.uri))
resource.addProperty(RDA.hasSponsoringAgentOfResource, node)
}
private fun addRelatedRecordSet(resource: Resource, model: Model, link: Link, language: String) {
val node = model.createResource()
node.addProperty(RDF.type, RICO.RecordSet)
link.title.let {
if (it != null && it.isNotEmpty())
node.addProperty(RICO.title, langLiteral(it, language))
}
node.addProperty(SCHEMA.sameAs, literal(link.uri))
resource.addProperty(RICO.isRecordResourceAssociatedWithRecordResource, node)
}
private fun addRelatedRecord(resource: Resource, model: Model, link: Link, language: String, property: Property) {
val node = model.createResource()
node.addProperty(RDF.type, RICO.Record)
link.title.let {
if (it != null && it.isNotEmpty())
node.addProperty(RICO.title, langLiteral(it, language))
}
node.addProperty(SCHEMA.sameAs, literal(link.uri))
resource.addProperty(property, node)
}
private fun generateLocationResource(model: Model, address: Address): Resource {
val location = model.createResource()
......
......@@ -35,10 +35,10 @@ import org.memobase.model.IdLabels
object Util {
const val languageSourceFilePathPropertyName = "path.languages"
const val memoriavProject = "memoriavProject"
const val memoriavUri = NS.mbcb + "mrv"
val now: String = LocalDateTime.now().format(DateTimeFormatter.ISO_DATE_TIME)
private val wikidataNamespace = "http://www.wikidata.org/entity/"
private val log = LogManager.getLogger("DrupalSyncHelpers")
fun getMunicipalities(): Map<String, IdLabels> {
......
......@@ -21,13 +21,13 @@ data class RecordSet(
/* Publikations Status */
val status: Boolean,
/* 0.1 Titel (Memobase) */
val title: String,
val title_de: String,
val title_fr: String,
val title_it: String,
/* 0.2.1 Thumbnail Searchresult */
val computed_teaser_image_url: String?,
/* 1.1 Inhalt */
val field_content: RichText?,
val field_content_de: RichText?,
val field_content_fr: RichText?,
val field_content_it: RichText?,
/* 1.2 Entstehungszeitrum */
......@@ -37,19 +37,19 @@ data class RecordSet(
val field_language_fr: String?,
val field_language_it: String?,
/* 1.4 Zugang Memobase */
val field_access_memobase: List<RichText>,
val field_access_memobase_fr: List<RichText>,
val field_access_memobase_it: List<RichText>,
val field_access_memobase_de: RichText?,
val field_access_memobase_fr: RichText?,
val field_access_memobase_it: RichText?,
/* 2.1 Kontext */
val field_context: RichText?,
val field_context_de: RichText?,
val field_context_fr: RichText?,
val field_context_it: RichText?,
/* 3.1 Titel (original) */
val field_original_title: String?,
val field_original_title_de: String?,
val field_original_title_fr: String?,
val field_original_title_it: String?,
/* 3.2 Umfang */
val field_scope: RichText?,
val field_scope_de: RichText?,
val field_scope_fr: RichText?,
val field_scope_it: RichText?,
/* 3.3 Auswahl */
......@@ -71,18 +71,18 @@ data class RecordSet(
/* 3.8 Bestandes Signatur */
val field_original_shelf_mark: String?,
/* 3.9 Beschreibung */
val field_text: RichText?,
val field_text_de: RichText?,
val field_text_fr: RichText?,
val field_text_it: RichText?,
/* 3.9 Original Bestandesbeschreibung (Titel) */
/* 3.9.1 Original Bestandesbeschreibung (Link) */
val field_original_description: Link?,
val field_original_description_de: Link?,
val field_original_description_it: Link?,
val field_original_description_fr: Link?,
/* 4.1 Zugang */
val field_access: List<RichText>,
val field_access_fr: List<RichText>,
val field_access_it: List<RichText>,
val field_access_de: RichText?,
val field_access_fr: RichText?,
val field_access_it: RichText?,
/* 4.2 Original Institution */
val field_resp_institution_original: List<String>,
/* 4.3 Master Institution */
......@@ -91,26 +91,26 @@ data class RecordSet(
val field_resp_institution_access: List<String>,
/* 5.1 Projektname */
/* 5.2 Projektbeschreibung (Link) */
val field_project: List<Link>,
val field_project_de: List<Link>,
val field_project_fr: List<Link>,
val field_project_it: List<Link>,
/* 5.1 Verwandte Bestände (Titel) */
/* 5.2 Verwandte Bestände (Link) */
val field_related_record_sets: List<Link>,
val field_related_record_sets_de: List<Link>,
val field_related_record_sets_fr: List<Link>,
val field_related_record_sets_it: List<Link>,
/* 5.5 Puplikation (Titel) */
/* 5.6 Publikation (Link) */
val field_publications: List<Link>,
val field_publications_de: List<Link>,
val field_publications_fr: List<Link>,
val field_publications_it: List<Link>,
/* 5.7 Verwandte Dokumente (Titel) */
/* 5.8 Verwandte Dokumente (Link) */
val field_documents: Link?,
val field_documents_de: Link?,
val field_documents_fr: Link?,
val field_documents_it: Link?,
/* 6.1 Datenübernahme */
val field_data_transfer: RichText?,
val field_data_transfer_de: RichText?,
val field_data_transfer_fr: RichText?,
val field_data_transfer_it: RichText?,
/* 6.2 Datum Übernahme (YYYY-MM-DD) */
......
......@@ -42,7 +42,7 @@ class TestRecordSets {
private val log = LogManager.getLogger("TestLogger")
private val regex = Regex("(_:B[A-Za-z0-9]+)")
private val regexTime = Regex("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}")
private val regexTime = Regex("\\d{4}-\\d{2}-\\d{2}[^\"]*")
private fun sort(source: List<String>): String {
......@@ -52,7 +52,7 @@ class TestRecordSets {
replacedString = replacedString.replace(matchResult.groups[0]?.value.orEmpty(), "_:B")
}
for (matchResult in regexTime.findAll(it)) {
replacedString = replacedString.replace(matchResult.groups[0]?.value.orEmpty(), "2020-10-10T09:10:22")
replacedString = replacedString.replace(matchResult.groups[0]?.value.orEmpty(), "2020")
}
replacedString
}.sorted().reduce { acc, s -> acc + "\n" + s }.trim()
......@@ -66,7 +66,7 @@ class TestRecordSets {
@Test
fun `test record set transform`() {
val service = Service("test1.yml")
val input = JSON.parseJson(readFile("input.json"))[0]
val input = JSON.parseJson("testComplete", readFile("completeExample.json"))[0]
val result = RdfTransformer(service.settings.appSettings).createRecordSet(input as RecordSet)
assertAll("",
{
......@@ -85,7 +85,7 @@ class TestRecordSets {
)
testDriver.pipeInput(
factory.create(
service.settings.inputTopic, null, readFile("input.json")
service.settings.inputTopic, "completeExample", readFile("completeExample.json")
)
)
......@@ -110,7 +110,7 @@ class TestRecordSets {
assertAll("",
{
assertThat(sort(value.lines()))
.isEqualTo(sort(readFile("output.nt").lines()))
.isEqualTo(sort(readFile("outputCompleteExample.nt").lines()))
},
{
assertThat(key)
......
<https://memobase.ch/recordSet/testComplete> <http://rdaregistry.info/Elements/u/P60451> <https://memobase.ch/institution/mrv> .
<https://memobase.ch/recordSet/testComplete> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.ica.org/standards/RiC/ontology#RecordSet> .
<https://memobase.ch/recordSet/testComplete> <http://www.wikidata.org/prop/direct/P18> "https://mb-wf1.memobase.unibas.ch/sites/default/files/styles/teaser/public/2020-11/StadtArchivSchaffhausenGeb2.jpg?itok=2PsMvPqc" .
<https://memobase.ch/recordSet/testComplete> <https://memobase.ch/internal/isPublished> "false"^^<http://www.w3.org/2001/XMLSchema#boolean> .
<https://memobase.ch/recordSet/testComplete> <https://www.ica.org/standards/RiC/ontology#descriptiveNote> "<p>Beschreibung</p>\r\n\r\n<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>"@de .
<https://memobase.ch/recordSet/testComplete> <https://www.ica.org/standards/RiC/ontology#descriptiveNote> "<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>"@fr .
<https://memobase.ch/recordSet/testComplete> <https://www.ica.org/standards/RiC/ontology#descriptiveNote> "<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>"@it .
<https://memobase.ch/recordSet/testComplete> <https://www.ica.org/standards/RiC/ontology#hasLanguage> _:B .
<https://memobase.ch/recordSet/testComplete> <https://www.ica.org/standards/RiC/ontology#hasTitle> _:B .
<https://memobase.ch/recordSet/testComplete> <https://www.ica.org/standards/RiC/ontology#heldBy> "https://memobase.ch/institution/clg" .
<https://memobase.ch/recordSet/testComplete> <https://www.ica.org/standards/RiC/ontology#identifiedBy> _:B .
<https://memobase.ch/recordSet/testComplete> <https://www.ica.org/standards/RiC/ontology#identifiedBy> _:B .
<https://memobase.ch/recordSet/testComplete> <https://www.ica.org/standards/RiC/ontology#isAssociatedWithDate> _:B .
<https://memobase.ch/recordSet/testComplete> <https://www.ica.org/standards/RiC/ontology#title> "Complete Example"@de .
<https://memobase.ch/recordSet/testComplete> <https://www.ica.org/standards/RiC/ontology#title> "Complete Example"@fr .
<https://memobase.ch/recordSet/testComplete> <https://www.ica.org/standards/RiC/ontology#title> "Complete Example"@it .
_:B <http://schema.org/sameAs> "http://www.wikidata.org/entity/Q188" .
_:B <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.ica.org/standards/RiC/ontology#DateRange> .
_:B <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.ica.org/standards/RiC/ontology#Identifier> .
_:B <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.ica.org/standards/RiC/ontology#Identifier> .
_:B <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.ica.org/standards/RiC/ontology#Language> .
_:B <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.ica.org/standards/RiC/ontology#Title> .
_:B <https://www.ica.org/standards/RiC/ontology#identifier> "OLD_MEMOBASE_ID" .
_:B <https://www.ica.org/standards/RiC/ontology#identifier> "testComplete" .
_:B <https://www.ica.org/standards/RiC/ontology#name> "Allemand"@fr .
_:B <https://www.ica.org/standards/RiC/ontology#name> "Deutsch"@de .
_:B <https://www.ica.org/standards/RiC/ontology#name> "Tedesco"@it .
_:B <https://www.ica.org/standards/RiC/ontology#normalizedDateValue> "1920-2020" .
_:B <https://www.ica.org/standards/RiC/ontology#title> "Complete Example"@de .
_:B <https://www.ica.org/standards/RiC/ontology#title> "Complete Example"@fr .
_:B <https://www.ica.org/standards/RiC/ontology#title> "Complete Example"@it .
_:B <https://www.ica.org/standards/RiC/ontology#type> "main" .
_:B <https://www.ica.org/standards/RiC/ontology#type> "main" .
_:B <https://www.ica.org/standards/RiC/ontology#type> "metadata" .
_:B <https://www.ica.org/standards/RiC/ontology#type> "oldMemobase" .
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment