From e546e960782b3c7d9cc1f8a4c80c27b432791907 Mon Sep 17 00:00:00 2001 From: Jonas Waeber Date: Tue, 30 Jun 2020 13:26:29 +0200 Subject: [PATCH] Change loader & transform - refactors loader in separate object. - adds test to loader. - change transform to accept language tag as param --- src/main/kotlin/KafkaTopology.kt | 36 +----- src/main/kotlin/MunicipalitiesLoader.kt | 36 ++++++ src/main/kotlin/Transform.kt | 6 +- src/test/kotlin/Test.kt | 16 ++- src/test/resources/data/1/input.json | 156 ++---------------------- src/test/resources/data/2/input.json | 153 +---------------------- 6 files changed, 64 insertions(+), 339 deletions(-) create mode 100644 src/main/kotlin/MunicipalitiesLoader.kt diff --git a/src/main/kotlin/KafkaTopology.kt b/src/main/kotlin/KafkaTopology.kt index 33359ad..a96aa7c 100644 --- a/src/main/kotlin/KafkaTopology.kt +++ b/src/main/kotlin/KafkaTopology.kt @@ -26,45 +26,19 @@ import org.apache.kafka.streams.Topology import org.apache.logging.log4j.LogManager import org.memobase.settings.SettingsLoader import java.io.StringReader -import kotlin.system.exitProcess class KafkaTopology(private val settings: SettingsLoader) { private val log = LogManager.getLogger("StreamsProcessing") - private val municipalities = getMunicipalities() - - private fun getMunicipalities(): Map { - val stream = ClassLoader.getSystemResourceAsStream("municipalities.tsv") - if (stream != null) { - return stream.bufferedReader().lineSequence().map { - val values = it.split("\t") - Municipality( - values[1].trim('"').split(", "), - values[2].trim('<', '>'), - values[0].trim('<', '>'), - values[3].replace("@de", "").trim('"'), - values[4].replace("@fr", "").trim('"'), - values[5].replace("@it", "").trim('"'), - values[6].split(",") - ) - }.map { municipality -> - municipality.postalCodes.map { code -> - Pair(code, municipality) - } - }.flatten().toMap() - } else { - log.error("Could not load municipalities.tsv from classpath!") - exitProcess(1) - } - } - + private val municipalities = MunicipalitiesLoader.getMunicipalities() fun build(): Topology { val builder = StreamsBuilder() val stream = builder.stream(settings.inputTopic) stream .flatMapValues { value -> parseJson(value) } - .mapValues { value -> transformJson(value) } + .flatMapValues { value -> value.keys.map { key -> Pair(value[key] as JsonObject, key) } } + .mapValues { value -> transformJson(value.first, value.second) } .map { _, value -> value.write() } .to(settings.outputTopic) @@ -80,7 +54,7 @@ class KafkaTopology(private val settings: SettingsLoader) { } } - private fun transformJson(input: JsonObject): Transform { - return Transform(municipalities).createInstitution(input) + private fun transformJson(input: JsonObject, language: String): Transform { + return Transform(municipalities).createInstitution(input, language) } } diff --git a/src/main/kotlin/MunicipalitiesLoader.kt b/src/main/kotlin/MunicipalitiesLoader.kt new file mode 100644 index 0000000..654edaf --- /dev/null +++ b/src/main/kotlin/MunicipalitiesLoader.kt @@ -0,0 +1,36 @@ +package org.memobase + +import org.apache.logging.log4j.LogManager +import kotlin.system.exitProcess + +object MunicipalitiesLoader { + + private val log = LogManager.getLogger("MunicipalitiesLoader") + + fun getMunicipalities(): Map { + val stream = ClassLoader.getSystemResourceAsStream("municipalities.tsv") + if (stream != null) { + return stream.bufferedReader().lineSequence().filterNot { + it.startsWith("item") + }.map { + val values = it.split("\t") + Municipality( + values[1].trim('"').split(", "), + values[2].trim('<', '>'), + values[0].trim('<', '>'), + values[3].replace("@de", "").trim('"'), + values[4].replace("@fr", "").trim('"'), + values[5].replace("@it", "").trim('"'), + values[6].split(",") + ) + }.map { municipality -> + municipality.postalCodes.map { code -> + Pair(code, municipality) + } + }.flatten().toMap() + } else { + log.error("Could not load municipalities.tsv from classpath!") + exitProcess(1) + } + } +} \ No newline at end of file diff --git a/src/main/kotlin/Transform.kt b/src/main/kotlin/Transform.kt index b4454b0..0881f10 100644 --- a/src/main/kotlin/Transform.kt +++ b/src/main/kotlin/Transform.kt @@ -19,7 +19,7 @@ class Transform(private val municipalities: Map) { private val model = ModelFactory.createDefaultModel() private var uri: String = "" - fun createInstitution(source: JsonObject): Transform { + fun createInstitution(source: JsonObject, language: String): Transform { val id = source["field_memobase_id"].let { if (it is String) { it @@ -38,8 +38,8 @@ class Transform(private val municipalities: Map) { resource.addProperty(RICO.identifiedBy, identifier) // TODO: proper multi language integration! - resource.addProperty(RICO.name, langLiteral(source["field_name"] as String, "de")) - resource.addProperty(RICO.descriptiveNote, langLiteral(source["field_text"] as String, "de")) + resource.addProperty(RICO.name, langLiteral(source["field_name"] as String, language)) + resource.addProperty(RICO.descriptiveNote, langLiteral(source["field_text"] as String, language)) source["field_addresses"].let { fieldAddressValue -> try { diff --git a/src/test/kotlin/Test.kt b/src/test/kotlin/Test.kt index f0cd1db..c218785 100644 --- a/src/test/kotlin/Test.kt +++ b/src/test/kotlin/Test.kt @@ -17,10 +17,6 @@ */ package org.memobase -import com.beust.klaxon.Klaxon -import java.io.File -import java.nio.charset.Charset -import java.util.stream.Stream import org.apache.kafka.common.serialization.StringDeserializer import org.apache.kafka.common.serialization.StringSerializer import org.apache.kafka.streams.TopologyTestDriver @@ -29,8 +25,8 @@ import org.apache.logging.log4j.LogManager import org.assertj.core.api.Assertions.assertThat import org.junit.jupiter.api.Test import org.junit.jupiter.api.TestInstance -import org.junit.jupiter.params.ParameterizedTest -import org.junit.jupiter.params.provider.MethodSource +import java.io.File +import java.nio.charset.Charset @TestInstance(TestInstance.Lifecycle.PER_CLASS) class Test { @@ -41,6 +37,14 @@ class Test { return File("$resourcePath/$fileName").readText(Charset.defaultCharset()) } + @Test + fun `test municipalities loader`() { + val result = MunicipalitiesLoader.getMunicipalities() + assertThat(result) + .isNotNull + .isNotEmpty + } + @Test fun `test institution transform`() { val service = Service("test1.yml") diff --git a/src/test/resources/data/1/input.json b/src/test/resources/data/1/input.json index 014f734..4b5168b 100644 --- a/src/test/resources/data/1/input.json +++ b/src/test/resources/data/1/input.json @@ -1,150 +1,12 @@ { - "@context": { - "dct": "http://purl.org/dc/terms/", - "ebucore": "http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#", - "rdau": "http://rdaregistry.info/Elements/u/", - "rico": "https://www.ica.org/standards/RiC/ontology#" + "de": { + "field_memobase_id": "test", + "field_isil": "12345" }, - "@graph": [ - { - "@id": "https://memobase.ch/record/BAZ-MEI_49884", - "@type": "rico:Record", - "dct:created": { - "@id": "_:N75a34824fdfe472aa69225441c3de195" - }, - "dct:relation": "Bezug Findmittel: Auftragsregister Bd. 6; Bildverzeichnis Bd. 7", - "rdau:P60451": "Memoriav", - "rdau:P60556": { - "@id": "_:Nc0062ea95b3d4334a4ad69d1008febb7" - }, - "rico:descriptiveNote": { - "@language": "de", - "@value": "Villa mit Garten und Brunnen im Vordergrund. Vermutlich von Architekt Walz" - }, - "rico:hasInstantiation": [ - { - "@id": "https://memobase.ch/instantiation/physical/BAZ-MEI_49884-0" - }, - { - "@id": "https://memobase.ch/instantiation/digital/BAZ-MEI_49884-1" - } - ], - "rico:hasTitle": { - "@id": "_:N35f354604b1745d59b1ad63a426f6c6a" - }, - "rico:heldBy": { - "@id": "https://memobase.ch/institution/BAZ" - }, - "rico:identifiedBy": [ - { - "@id": "_:Nb9da0f23b09a47e3b9ad346ff8c1d586" - }, - { - "@id": "_:Nb3242bd2e2af41cbb45e1e9a8361b0d5" - } - ], - "rico:isPartOf": { - "@id": "https://memobase.ch/recordSet/BAZ-B_MEI" - }, - "rico:recordResourceOrInstantiationIsSourceOfCreationRelation": [ - { - "@id": "_:Na2fdeb4326984672a63ae7ea468e9d9b" - }, - { - "@id": "_:Nec9dfa5ba2704cd291cee38aa018e05f" - } - ], - "rico:regulatedBy": { - "@id": "_:Nfe03ee672afa4223921c79464904b674" - }, - "rico:title": { - "@language": "de", - "@value": "«Villa Siegel», Zürich" - }, - "rico:type": "Foto" - }, - { - "@id": "_:Nfe03ee672afa4223921c79464904b674", - "@type": "rico:Rule", - "rico:name": "BAZ", - "rico:regulates": { - "@id": "https://memobase.ch/record/BAZ-MEI_49884" - }, - "rico:type": "holder" - }, - { - "@id": "_:N75a34824fdfe472aa69225441c3de195", - "@type": "rico:SingleDate", - "rico:normalizedDateValue": "1921-09-14" - }, - { - "@id": "_:Nc0062ea95b3d4334a4ad69d1008febb7", - "@type": "rico:Place", - "rico:name": { - "@language": "de", - "@value": "Zürich" - } - }, - { - "@id": "_:Nb9da0f23b09a47e3b9ad346ff8c1d586", - "@type": "rico:Identifier", - "rico:identifier": "MEI_49884", - "rico:type": "callNumber" - }, - { - "@id": "_:N35f354604b1745d59b1ad63a426f6c6a", - "@type": "rico:Title", - "rico:title": { - "@language": "de", - "@value": "«Villa Siegel», Zürich" - }, - "rico:type": "main" - }, - { - "@id": "_:Nb3242bd2e2af41cbb45e1e9a8361b0d5", - "@type": "rico:Identifier", - "rico:identifier": "https://memobase.ch/record/BAZ-MEI_49884", - "rico:type": "main" - }, - { - "@id": "_:Na2fdeb4326984672a63ae7ea468e9d9b", - "@type": "rico:CreationRelation", - "rico:creationRelationHasSource": { - "@id": "https://memobase.ch/record/BAZ-MEI_49884" - }, - "rico:creationRelationHasTarget": { - "@id": "_:Nea2f27453d394da9bf5ea68ddf5efac7" - }, - "rico:type": "Contributor", - "rico:name": "Auftraggeber" - }, - { - "@id": "_:Nea2f27453d394da9bf5ea68ddf5efac7", - "@type": "rico:Agent", - "rico:name": { - "@language": "de", - "@value": "Walz" - } - }, - { - "@id": "_:Nec9dfa5ba2704cd291cee38aa018e05f", - "@type": "rico:CreationRelation", - "rico:creationRelationHasSource": { - "@id": "https://memobase.ch/record/BAZ-MEI_49884" - }, - "rico:creationRelationHasTarget": { - "@id": "_:Nd06faa8500ae42c3b8b6e3e5ed59551b" - }, - "rico:type": "Creator", - "rico:name": "Fotograf" - }, - { - "@id": "_:Nd06faa8500ae42c3b8b6e3e5ed59551b", - "@type": "rico:CorporateBody", - "rico:name": { - "@language": "de", - "@value": "Atelier Meiner" - } - } - ] + "fr": { + "field_memobase_id": "test" + }, + "it": { + "field_memobase_id": "test" + } } \ No newline at end of file diff --git a/src/test/resources/data/2/input.json b/src/test/resources/data/2/input.json index 8096140..ad21c9d 100644 --- a/src/test/resources/data/2/input.json +++ b/src/test/resources/data/2/input.json @@ -1,154 +1,3 @@ { - "@context": { - "dct": "http://purl.org/dc/terms/", - "ebucore": "http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#", - "rdau": "http://rdaregistry.info/Elements/u/", - "rico": "https://www.ica.org/standards/RiC/ontology#" - }, - "@graph": [ - { - "@id": "https://memobase.ch/record/BAZ-MEI_49885", - "@type": "rico:Record", - "dct:created": { - "@id": "_:Na2432cc9e994416db79302cb09fdc636" - }, - "dct:relation": "Bezug Findmittel: Auftragsregister Bd. 6; Bildverzeichnis Bd. 7", - "rdau:P60451": "Memoriav", - "rdau:P60556": { - "@id": "_:Na043e5114f104d5da1d94f7d24eca6ab" - }, - "rico:descriptiveNote": [{ - "@language": "de", - "@value": "Villa hinter Hecke und Bäumen, von der Strasse aus fotografiert. Vermutlich von Architekt Walz" - },{ - "@language": "fr", - "@value": "Villa hinter Hecke und Bäumen, von der Strasse aus fotografiert. Vermutlich von Architekt Walz" - }], - "rico:hasInstantiation": [ - { - "@id": "https://memobase.ch/instantiation/physical/BAZ-MEI_49885-0" - }, - { - "@id": "https://memobase.ch/instantiation/digital/BAZ-MEI_49885-1" - } - ], - "rico:hasTitle": { - "@id": "_:N78bd09b5b58e40b788cb8c8720a77db0" - }, - "rico:heldBy": { - "@id": "https://memobase.ch/institution/BAZ" - }, - "rico:identifiedBy": [ - { - "@id": "_:Nfdd79cb4498a47b89e203ac0be08ee93" - }, - { - "@id": "_:N19dd1c0299d546769a039069636929c5" - } - ], - "rico:isPartOf": { - "@id": "https://memobase.ch/recordSet/BAZ-B_MEI" - }, - "rico:recordResourceOrInstantiationIsSourceOfCreationRelation": [ - { - "@id": "_:N3ccb5e23410b4af298e7362c346d20cc" - }, - { - "@id": "_:Nf282ecd7137244e0a7fa085f5b97c49f" - } - ], - "rico:regulatedBy": { - "@id": "_:N30ede98ab3874503b52d7929ade0815d" - }, - "rico:title": - { - "@language": "de", - "@value": "«Villa Siegel», Zürich" - }, - "rico:type": "Foto" - }, - { - "@id": "_:Nfdd79cb4498a47b89e203ac0be08ee93", - "@type": "rico:Identifier", - "rico:identifier": "MEI_49885", - "rico:type": "original" - }, - { - "@id": "_:N78bd09b5b58e40b788cb8c8720a77db0", - "@type": "rico:Title", - "rico:title": { - "@language": "de", - "@value": "«Villa Siegel», Zürich" - }, - "rico:type": "main" - }, - { - "@id": "_:N19dd1c0299d546769a039069636929c5", - "@type": "rico:Identifier", - "rico:identifier": "https://memobase.ch/record/BAZ-MEI_49885", - "rico:type": "main" - }, - { - "@id": "_:Na2432cc9e994416db79302cb09fdc636", - "@type": "rico:DateSet", - "rico:expressedDate": "19210914" - }, - { - "@id": "_:N30ede98ab3874503b52d7929ade0815d", - "@type": "rico:Rule", - "rico:name": "BAZ", - "rico:regulates": { - "@id": "https://memobase.ch/record/BAZ-MEI_49885" - }, - "rico:type": "holder" - }, - { - "@id": "_:N3ccb5e23410b4af298e7362c346d20cc", - "@type": "rico:CreationRelation", - "rico:creationRelationHasSource": { - "@id": "https://memobase.ch/record/BAZ-MEI_49885" - }, - "rico:creationRelationHasTarget": { - "@id": "_:N019a117ac0044c108f09b61478183e0a" - }, - "rico:type": "Creator", - "rico:name": "Fotograf" - }, - { - "@id": "_:N019a117ac0044c108f09b61478183e0a", - "@type": "rico:CorporateBody", - "rico:name": { - "@language": "de", - "@value": "Atelier Meiner" - } - }, - { - "@id": "_:Nf282ecd7137244e0a7fa085f5b97c49f", - "@type": "rico:CreationRelation", - "rico:creationRelationHasSource": { - "@id": "https://memobase.ch/record/BAZ-MEI_49885" - }, - "rico:creationRelationHasTarget": { - "@id": "_:N208a92b667c9424a89efdd6d5516e815" - }, - "rico:type": "Contributor", - "rico:name": "Auftraggeber" - }, - { - "@id": "_:N208a92b667c9424a89efdd6d5516e815", - "@type": "rico:Agent", - "rico:name": { - "@language": "de", - "@value": "Walz" - } - }, - { - "@id": "_:Na043e5114f104d5da1d94f7d24eca6ab", - "@type": "rico:Place", - "rico:name": { - "@language": "de", - "@value": "Zürich" - } - } - ] + "field_memobase_id": "test" } \ No newline at end of file -- GitLab