Commit 6196e898 authored by Jonas Waeber's avatar Jonas Waeber
Browse files

Improves error handling.

parent d3778ad9
...@@ -86,10 +86,10 @@ test { ...@@ -86,10 +86,10 @@ test {
sourceSets { sourceSets {
main.kotlin.srcDirs += 'src/main/kotlin' main.kotlin.srcDirs += 'src/main/kotlin'
main.resources.srcDirs = [ "src/main/resources" ] main.resources.srcDirs = [ "src/main/resources" ]
main.resources.includes = [ "**/*.yml", "**/*.xml"] main.resources.includes = [ "**/*.yml", "**/*.xml", "**/*.tsv"]
test.kotlin.srcDirs += 'src/test/kotlin' test.kotlin.srcDirs += 'src/test/kotlin'
test.resources.srcDirs = [ "src/test/resources" ] test.resources.srcDirs = [ "src/test/resources" ]
test.resources.includes = [ "**/*.yml", "**/*.xml"] test.resources.includes = [ "**/*.yml", "**/*.xml", "**/*.tsv"]
} }
plugins.withType(DistributionPlugin) { plugins.withType(DistributionPlugin) {
......
...@@ -3,12 +3,15 @@ package org.memobase ...@@ -3,12 +3,15 @@ package org.memobase
import com.beust.klaxon.JsonObject import com.beust.klaxon.JsonObject
import org.apache.jena.rdf.model.Literal import org.apache.jena.rdf.model.Literal
import org.apache.jena.rdf.model.ModelFactory import org.apache.jena.rdf.model.ModelFactory
import org.apache.jena.rdf.model.Property
import org.apache.jena.rdf.model.Resource
import org.apache.jena.riot.RDFDataMgr import org.apache.jena.riot.RDFDataMgr
import org.apache.jena.riot.RDFFormat import org.apache.jena.riot.RDFFormat
import org.apache.kafka.streams.KeyValue import org.apache.kafka.streams.KeyValue
import org.apache.logging.log4j.LogManager import org.apache.logging.log4j.LogManager
import org.memobase.rdf.* import org.memobase.rdf.*
import java.io.StringWriter import java.io.StringWriter
import kotlin.system.exitProcess
class Transform(private val municipalities: Map<String, Municipality>) { class Transform(private val municipalities: Map<String, Municipality>) {
private val log = LogManager.getLogger("Transform") private val log = LogManager.getLogger("Transform")
...@@ -17,73 +20,49 @@ class Transform(private val municipalities: Map<String, Municipality>) { ...@@ -17,73 +20,49 @@ class Transform(private val municipalities: Map<String, Municipality>) {
private var uri: String = "" private var uri: String = ""
fun createInstitution(source: JsonObject): Transform { fun createInstitution(source: JsonObject): Transform {
val resource = model.createResource(NS.memint + source["field_memobase_id"]) val id = source["field_memobase_id"].let {
if (it is String) {
it
} else {
log.error("No field memobase id defined.")
exitProcess(1)
}
}
val resource = model.createResource(NS.memint + id)
uri = resource.uri uri = resource.uri
val identifier = model.createResource() val identifier = model.createResource()
identifier.addProperty(RDF.type, RICO.Identifier) identifier.addProperty(RDF.type, RICO.Identifier)
identifier.addProperty(RICO.type, literal("main")) identifier.addProperty(RICO.type, literal("main"))
identifier.addProperty(RICO.identifier, literal(source["field_memobase_id"] as String)) identifier.addProperty(RICO.identifier, literal(id))
resource.addProperty(RICO.identifiedBy, identifier) resource.addProperty(RICO.identifiedBy, identifier)
resource.addProperty(WD.isil, literal(source["field_isil"] as String)) // TODO: proper multi language integration!
resource.addProperty(RICO.name, langLiteral(source["field_name"] as String, "de")) resource.addProperty(RICO.name, langLiteral(source["field_name"] as String, "de"))
resource.addProperty(RICO.descriptiveNote, langLiteral(source["field_text"] as String, "de")) resource.addProperty(RICO.descriptiveNote, langLiteral(source["field_text"] as String, "de"))
(source["field_addresses"] as List<JsonObject>).forEach { fieldAddress -> source["field_addresses"].let { fieldAddressValue ->
val location = model.createResource() try {
val streetAddress = fieldAddress["address_line1"] as String fieldAddressValue as List<JsonObject>
val secondAddressLine = fieldAddress["address_line2"] as String? fieldAddressValue.forEach { fieldAddress ->
val combinedStreetAddress = if (secondAddressLine != null) { extractAddressField(resource, fieldAddress)
streetAddress + "\n" + secondAddressLine
} else {
streetAddress
}
val streetNumber = streetAddress.substringAfterLast(" ")
val street = streetAddress.replace(streetNumber, "").trim()
val postalCode = (fieldAddress["postal_code"] as String).trim()
val municipality = if (municipalities.containsKey(postalCode)) {
municipalities[postalCode]
} else {
// TODO: This information needs to reach the user!
log.error("Invalid postal code: $postalCode")
null
}
location.addProperty(RDF.type, WD.location)
location.addProperty(WD.street, literal(street))
location.addProperty(WD.streetNumber, literal(streetNumber))
location.addProperty(WD.streetAddress, literal(combinedStreetAddress))
location.addProperty(WD.postalCode, literal(postalCode))
// does not enrich city, canton or cantons, if the postal code is not in the list.
if (municipality != null) {
// canton
location.addProperty(WD.adminUnit, model.createResource(municipality.canton))
// city
location.addProperty(WD.adminUnit, model.createResource(municipality.id))
// coordinates
municipality.coordinates.forEach { coordinate ->
location.addProperty(WD.coordinates, literal(coordinate))
} }
} catch (ex: ClassCastException) {
log.warn("Could not cast field_addresses to JsonObject: $fieldAddressValue.")
} }
//val country = it["country_code"] as String
// country is currently hard coded to switzerland!
location.addProperty(WD.country, WD.switzerland)
resource.addProperty(WD.streetAddress, location)
} }
resource.addProperty(WD.website, literal(source["field_website"] as String)) extractSimpleField(resource, WD.isil, source, "field_isil")
resource.addProperty(WD.emailAddress, literal(source["field_email"] as String)) extractSimpleField(resource, WD.website, source, "field_website")
resource.addProperty(WD.onlineArchive, literal(source["field_online_archive"] as String)) extractSimpleField(resource, WD.emailAddress, source, "field_email")
resource.addProperty(SCHEMA.sameAs, literal(source["wikidata_id"] as String)) extractSimpleField(resource, WD.onlineArchive, source, "field_online_archive")
resource.addProperty(WD.image, literal(source["image"] as String)) extractSimpleField(resource, SCHEMA.sameAs, source, "wikidata_id")
resource.addProperty(WD.logo, literal(source["logo"] as String)) extractSimpleField(resource, WD.image, source, "image")
resource.addProperty(WD.typeOfInstitution, literal(source["instance_of"] as String)) extractSimpleField(resource, WD.logo, source, "logo")
extractSimpleField(resource, WD.typeOfInstitution, source, "instance_of")
return this return this
} }
fun write(): KeyValue<String, String> { fun write(): KeyValue<String, String> {
return StringWriter().use { writer -> return StringWriter().use { writer ->
RDFDataMgr.write(writer, model, RDFFormat.NTRIPLES_UTF8) RDFDataMgr.write(writer, model, RDFFormat.NTRIPLES_UTF8)
...@@ -91,9 +70,58 @@ class Transform(private val municipalities: Map<String, Municipality>) { ...@@ -91,9 +70,58 @@ class Transform(private val municipalities: Map<String, Municipality>) {
} }
} }
private fun extractSimpleField(resource: Resource, property: Property, source: JsonObject, fieldName: String) {
source[fieldName].let {
if (it is String) {
resource.addProperty(property, literal(it))
} else {
log.warn("No value for $fieldName found in source for institution $uri.")
}
}
}
private fun extractAddressField(resource: Resource, fieldAddress: JsonObject) {
val location = model.createResource()
val streetAddress = fieldAddress["address_line1"] as String
val secondAddressLine = fieldAddress["address_line2"] as String?
val combinedStreetAddress = if (secondAddressLine != null) {
streetAddress + "\n" + secondAddressLine
} else {
streetAddress
}
val streetNumber = streetAddress.substringAfterLast(" ")
val street = streetAddress.replace(streetNumber, "").trim()
val postalCode = (fieldAddress["postal_code"] as String).trim()
val municipality = if (municipalities.containsKey(postalCode)) {
municipalities[postalCode]
} else {
// the input validation in drupal should ensure that this never happens.
log.error("Invalid postal code: $postalCode")
null
}
location.addProperty(RDF.type, WD.location)
location.addProperty(WD.street, literal(street))
location.addProperty(WD.streetNumber, literal(streetNumber))
location.addProperty(WD.streetAddress, literal(combinedStreetAddress))
location.addProperty(WD.postalCode, literal(postalCode))
// does not enrich city, canton or cantons, if the postal code is not in the list.
if (municipality != null) {
// canton
location.addProperty(WD.adminUnit, model.createResource(municipality.canton))
// city
location.addProperty(WD.adminUnit, model.createResource(municipality.id))
// coordinates
municipality.coordinates.forEach { coordinate ->
location.addProperty(WD.coordinates, literal(coordinate))
}
}
//val country = it["country_code"] as String
// country is currently hard coded to switzerland!
location.addProperty(WD.country, WD.switzerland)
resource.addProperty(WD.streetAddress, location)
}
private fun langLiteral(text: String, language: String): Literal = model.createLiteral(text.trim(), language) private fun langLiteral(text: String, language: String): Literal = model.createLiteral(text.trim(), language)
private fun literal(text: String): Literal = model.createLiteral(text.trim()) private fun literal(text: String): Literal = model.createLiteral(text.trim())
} }
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment