Commit e546e960 authored by Jonas Waeber's avatar Jonas Waeber

Change loader & transform

- refactors loader in separate object.
- adds test to loader.
- change transform to accept language tag as param
parent 55bbe5f1
......@@ -26,45 +26,19 @@ import org.apache.kafka.streams.Topology
import org.apache.logging.log4j.LogManager
import org.memobase.settings.SettingsLoader
import java.io.StringReader
import kotlin.system.exitProcess
class KafkaTopology(private val settings: SettingsLoader) {
private val log = LogManager.getLogger("StreamsProcessing")
private val municipalities = getMunicipalities()
private fun getMunicipalities(): Map<String, Municipality> {
val stream = ClassLoader.getSystemResourceAsStream("municipalities.tsv")
if (stream != null) {
return stream.bufferedReader().lineSequence().map {
val values = it.split("\t")
Municipality(
values[1].trim('"').split(", "),
values[2].trim('<', '>'),
values[0].trim('<', '>'),
values[3].replace("@de", "").trim('"'),
values[4].replace("@fr", "").trim('"'),
values[5].replace("@it", "").trim('"'),
values[6].split(",")
)
}.map { municipality ->
municipality.postalCodes.map { code ->
Pair(code, municipality)
}
}.flatten().toMap()
} else {
log.error("Could not load municipalities.tsv from classpath!")
exitProcess(1)
}
}
private val municipalities = MunicipalitiesLoader.getMunicipalities()
fun build(): Topology {
val builder = StreamsBuilder()
val stream = builder.stream<String, String>(settings.inputTopic)
stream
.flatMapValues { value -> parseJson(value) }
.mapValues { value -> transformJson(value) }
.flatMapValues { value -> value.keys.map { key -> Pair(value[key] as JsonObject, key) } }
.mapValues { value -> transformJson(value.first, value.second) }
.map { _, value -> value.write() }
.to(settings.outputTopic)
......@@ -80,7 +54,7 @@ class KafkaTopology(private val settings: SettingsLoader) {
}
}
private fun transformJson(input: JsonObject): Transform {
return Transform(municipalities).createInstitution(input)
private fun transformJson(input: JsonObject, language: String): Transform {
return Transform(municipalities).createInstitution(input, language)
}
}
package org.memobase
import org.apache.logging.log4j.LogManager
import kotlin.system.exitProcess
object MunicipalitiesLoader {
private val log = LogManager.getLogger("MunicipalitiesLoader")
fun getMunicipalities(): Map<String, Municipality> {
val stream = ClassLoader.getSystemResourceAsStream("municipalities.tsv")
if (stream != null) {
return stream.bufferedReader().lineSequence().filterNot {
it.startsWith("item")
}.map {
val values = it.split("\t")
Municipality(
values[1].trim('"').split(", "),
values[2].trim('<', '>'),
values[0].trim('<', '>'),
values[3].replace("@de", "").trim('"'),
values[4].replace("@fr", "").trim('"'),
values[5].replace("@it", "").trim('"'),
values[6].split(",")
)
}.map { municipality ->
municipality.postalCodes.map { code ->
Pair(code, municipality)
}
}.flatten().toMap()
} else {
log.error("Could not load municipalities.tsv from classpath!")
exitProcess(1)
}
}
}
\ No newline at end of file
......@@ -19,7 +19,7 @@ class Transform(private val municipalities: Map<String, Municipality>) {
private val model = ModelFactory.createDefaultModel()
private var uri: String = ""
fun createInstitution(source: JsonObject): Transform {
fun createInstitution(source: JsonObject, language: String): Transform {
val id = source["field_memobase_id"].let {
if (it is String) {
it
......@@ -38,8 +38,8 @@ class Transform(private val municipalities: Map<String, Municipality>) {
resource.addProperty(RICO.identifiedBy, identifier)
// TODO: proper multi language integration!
resource.addProperty(RICO.name, langLiteral(source["field_name"] as String, "de"))
resource.addProperty(RICO.descriptiveNote, langLiteral(source["field_text"] as String, "de"))
resource.addProperty(RICO.name, langLiteral(source["field_name"] as String, language))
resource.addProperty(RICO.descriptiveNote, langLiteral(source["field_text"] as String, language))
source["field_addresses"].let { fieldAddressValue ->
try {
......
......@@ -17,10 +17,6 @@
*/
package org.memobase
import com.beust.klaxon.Klaxon
import java.io.File
import java.nio.charset.Charset
import java.util.stream.Stream
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.kafka.common.serialization.StringSerializer
import org.apache.kafka.streams.TopologyTestDriver
......@@ -29,8 +25,8 @@ import org.apache.logging.log4j.LogManager
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.TestInstance
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.MethodSource
import java.io.File
import java.nio.charset.Charset
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
class Test {
......@@ -41,6 +37,14 @@ class Test {
return File("$resourcePath/$fileName").readText(Charset.defaultCharset())
}
@Test
fun `test municipalities loader`() {
val result = MunicipalitiesLoader.getMunicipalities()
assertThat(result)
.isNotNull
.isNotEmpty
}
@Test
fun `test institution transform`() {
val service = Service("test1.yml")
......
{
"@context": {
"dct": "http://purl.org/dc/terms/",
"ebucore": "http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#",
"rdau": "http://rdaregistry.info/Elements/u/",
"rico": "https://www.ica.org/standards/RiC/ontology#"
"de": {
"field_memobase_id": "test",
"field_isil": "12345"
},
"@graph": [
{
"@id": "https://memobase.ch/record/BAZ-MEI_49884",
"@type": "rico:Record",
"dct:created": {
"@id": "_:N75a34824fdfe472aa69225441c3de195"
},
"dct:relation": "Bezug Findmittel: Auftragsregister Bd. 6; Bildverzeichnis Bd. 7",
"rdau:P60451": "Memoriav",
"rdau:P60556": {
"@id": "_:Nc0062ea95b3d4334a4ad69d1008febb7"
},
"rico:descriptiveNote": {
"@language": "de",
"@value": "Villa mit Garten und Brunnen im Vordergrund. Vermutlich von Architekt Walz"
},
"rico:hasInstantiation": [
{
"@id": "https://memobase.ch/instantiation/physical/BAZ-MEI_49884-0"
},
{
"@id": "https://memobase.ch/instantiation/digital/BAZ-MEI_49884-1"
}
],
"rico:hasTitle": {
"@id": "_:N35f354604b1745d59b1ad63a426f6c6a"
},
"rico:heldBy": {
"@id": "https://memobase.ch/institution/BAZ"
},
"rico:identifiedBy": [
{
"@id": "_:Nb9da0f23b09a47e3b9ad346ff8c1d586"
},
{
"@id": "_:Nb3242bd2e2af41cbb45e1e9a8361b0d5"
}
],
"rico:isPartOf": {
"@id": "https://memobase.ch/recordSet/BAZ-B_MEI"
},
"rico:recordResourceOrInstantiationIsSourceOfCreationRelation": [
{
"@id": "_:Na2fdeb4326984672a63ae7ea468e9d9b"
},
{
"@id": "_:Nec9dfa5ba2704cd291cee38aa018e05f"
}
],
"rico:regulatedBy": {
"@id": "_:Nfe03ee672afa4223921c79464904b674"
},
"rico:title": {
"@language": "de",
"@value": "«Villa Siegel», Zürich"
},
"rico:type": "Foto"
},
{
"@id": "_:Nfe03ee672afa4223921c79464904b674",
"@type": "rico:Rule",
"rico:name": "BAZ",
"rico:regulates": {
"@id": "https://memobase.ch/record/BAZ-MEI_49884"
},
"rico:type": "holder"
},
{
"@id": "_:N75a34824fdfe472aa69225441c3de195",
"@type": "rico:SingleDate",
"rico:normalizedDateValue": "1921-09-14"
},
{
"@id": "_:Nc0062ea95b3d4334a4ad69d1008febb7",
"@type": "rico:Place",
"rico:name": {
"@language": "de",
"@value": "Zürich"
}
},
{
"@id": "_:Nb9da0f23b09a47e3b9ad346ff8c1d586",
"@type": "rico:Identifier",
"rico:identifier": "MEI_49884",
"rico:type": "callNumber"
},
{
"@id": "_:N35f354604b1745d59b1ad63a426f6c6a",
"@type": "rico:Title",
"rico:title": {
"@language": "de",
"@value": "«Villa Siegel», Zürich"
},
"rico:type": "main"
},
{
"@id": "_:Nb3242bd2e2af41cbb45e1e9a8361b0d5",
"@type": "rico:Identifier",
"rico:identifier": "https://memobase.ch/record/BAZ-MEI_49884",
"rico:type": "main"
},
{
"@id": "_:Na2fdeb4326984672a63ae7ea468e9d9b",
"@type": "rico:CreationRelation",
"rico:creationRelationHasSource": {
"@id": "https://memobase.ch/record/BAZ-MEI_49884"
},
"rico:creationRelationHasTarget": {
"@id": "_:Nea2f27453d394da9bf5ea68ddf5efac7"
},
"rico:type": "Contributor",
"rico:name": "Auftraggeber"
},
{
"@id": "_:Nea2f27453d394da9bf5ea68ddf5efac7",
"@type": "rico:Agent",
"rico:name": {
"@language": "de",
"@value": "Walz"
}
},
{
"@id": "_:Nec9dfa5ba2704cd291cee38aa018e05f",
"@type": "rico:CreationRelation",
"rico:creationRelationHasSource": {
"@id": "https://memobase.ch/record/BAZ-MEI_49884"
},
"rico:creationRelationHasTarget": {
"@id": "_:Nd06faa8500ae42c3b8b6e3e5ed59551b"
},
"rico:type": "Creator",
"rico:name": "Fotograf"
},
{
"@id": "_:Nd06faa8500ae42c3b8b6e3e5ed59551b",
"@type": "rico:CorporateBody",
"rico:name": {
"@language": "de",
"@value": "Atelier Meiner"
}
}
]
"fr": {
"field_memobase_id": "test"
},
"it": {
"field_memobase_id": "test"
}
}
\ No newline at end of file
{
"@context": {
"dct": "http://purl.org/dc/terms/",
"ebucore": "http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#",
"rdau": "http://rdaregistry.info/Elements/u/",
"rico": "https://www.ica.org/standards/RiC/ontology#"
},
"@graph": [
{
"@id": "https://memobase.ch/record/BAZ-MEI_49885",
"@type": "rico:Record",
"dct:created": {
"@id": "_:Na2432cc9e994416db79302cb09fdc636"
},
"dct:relation": "Bezug Findmittel: Auftragsregister Bd. 6; Bildverzeichnis Bd. 7",
"rdau:P60451": "Memoriav",
"rdau:P60556": {
"@id": "_:Na043e5114f104d5da1d94f7d24eca6ab"
},
"rico:descriptiveNote": [{
"@language": "de",
"@value": "Villa hinter Hecke und Bäumen, von der Strasse aus fotografiert. Vermutlich von Architekt Walz"
},{
"@language": "fr",
"@value": "Villa hinter Hecke und Bäumen, von der Strasse aus fotografiert. Vermutlich von Architekt Walz"
}],
"rico:hasInstantiation": [
{
"@id": "https://memobase.ch/instantiation/physical/BAZ-MEI_49885-0"
},
{
"@id": "https://memobase.ch/instantiation/digital/BAZ-MEI_49885-1"
}
],
"rico:hasTitle": {
"@id": "_:N78bd09b5b58e40b788cb8c8720a77db0"
},
"rico:heldBy": {
"@id": "https://memobase.ch/institution/BAZ"
},
"rico:identifiedBy": [
{
"@id": "_:Nfdd79cb4498a47b89e203ac0be08ee93"
},
{
"@id": "_:N19dd1c0299d546769a039069636929c5"
}
],
"rico:isPartOf": {
"@id": "https://memobase.ch/recordSet/BAZ-B_MEI"
},
"rico:recordResourceOrInstantiationIsSourceOfCreationRelation": [
{
"@id": "_:N3ccb5e23410b4af298e7362c346d20cc"
},
{
"@id": "_:Nf282ecd7137244e0a7fa085f5b97c49f"
}
],
"rico:regulatedBy": {
"@id": "_:N30ede98ab3874503b52d7929ade0815d"
},
"rico:title":
{
"@language": "de",
"@value": "«Villa Siegel», Zürich"
},
"rico:type": "Foto"
},
{
"@id": "_:Nfdd79cb4498a47b89e203ac0be08ee93",
"@type": "rico:Identifier",
"rico:identifier": "MEI_49885",
"rico:type": "original"
},
{
"@id": "_:N78bd09b5b58e40b788cb8c8720a77db0",
"@type": "rico:Title",
"rico:title": {
"@language": "de",
"@value": "«Villa Siegel», Zürich"
},
"rico:type": "main"
},
{
"@id": "_:N19dd1c0299d546769a039069636929c5",
"@type": "rico:Identifier",
"rico:identifier": "https://memobase.ch/record/BAZ-MEI_49885",
"rico:type": "main"
},
{
"@id": "_:Na2432cc9e994416db79302cb09fdc636",
"@type": "rico:DateSet",
"rico:expressedDate": "19210914"
},
{
"@id": "_:N30ede98ab3874503b52d7929ade0815d",
"@type": "rico:Rule",
"rico:name": "BAZ",
"rico:regulates": {
"@id": "https://memobase.ch/record/BAZ-MEI_49885"
},
"rico:type": "holder"
},
{
"@id": "_:N3ccb5e23410b4af298e7362c346d20cc",
"@type": "rico:CreationRelation",
"rico:creationRelationHasSource": {
"@id": "https://memobase.ch/record/BAZ-MEI_49885"
},
"rico:creationRelationHasTarget": {
"@id": "_:N019a117ac0044c108f09b61478183e0a"
},
"rico:type": "Creator",
"rico:name": "Fotograf"
},
{
"@id": "_:N019a117ac0044c108f09b61478183e0a",
"@type": "rico:CorporateBody",
"rico:name": {
"@language": "de",
"@value": "Atelier Meiner"
}
},
{
"@id": "_:Nf282ecd7137244e0a7fa085f5b97c49f",
"@type": "rico:CreationRelation",
"rico:creationRelationHasSource": {
"@id": "https://memobase.ch/record/BAZ-MEI_49885"
},
"rico:creationRelationHasTarget": {
"@id": "_:N208a92b667c9424a89efdd6d5516e815"
},
"rico:type": "Contributor",
"rico:name": "Auftraggeber"
},
{
"@id": "_:N208a92b667c9424a89efdd6d5516e815",
"@type": "rico:Agent",
"rico:name": {
"@language": "de",
"@value": "Walz"
}
},
{
"@id": "_:Na043e5114f104d5da1d94f7d24eca6ab",
"@type": "rico:Place",
"rico:name": {
"@language": "de",
"@value": "Zürich"
}
}
]
"field_memobase_id": "test"
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment