Transform.kt 5.42 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
3
4
5
package org.memobase

import com.beust.klaxon.JsonObject
import org.apache.jena.rdf.model.Literal
import org.apache.jena.rdf.model.ModelFactory
Jonas Waeber's avatar
Jonas Waeber committed
6
7
import org.apache.jena.rdf.model.Property
import org.apache.jena.rdf.model.Resource
Jonas Waeber's avatar
Jonas Waeber committed
8
9
10
import org.apache.jena.riot.RDFDataMgr
import org.apache.jena.riot.RDFFormat
import org.apache.kafka.streams.KeyValue
Jonas Waeber's avatar
Jonas Waeber committed
11
import org.apache.logging.log4j.LogManager
Jonas Waeber's avatar
Jonas Waeber committed
12
13
import org.memobase.rdf.*
import java.io.StringWriter
Jonas Waeber's avatar
Jonas Waeber committed
14
import kotlin.system.exitProcess
Jonas Waeber's avatar
Jonas Waeber committed
15

Jonas Waeber's avatar
Jonas Waeber committed
16
17
class Transform(private val municipalities: Map<String, Municipality>) {
    private val log = LogManager.getLogger("Transform")
Jonas Waeber's avatar
Jonas Waeber committed
18
19
20
21

    private val model = ModelFactory.createDefaultModel()
    private var uri: String = ""

Jonas Waeber's avatar
Jonas Waeber committed
22
    fun createInstitution(source: JsonObject, language: String): Transform {
Jonas Waeber's avatar
Jonas Waeber committed
23
24
25
26
27
28
29
30
31
        val id = source["field_memobase_id"].let {
            if (it is String) {
                it
            } else {
                log.error("No field memobase id defined.")
                exitProcess(1)
            }
        }
        val resource = model.createResource(NS.memint + id)
Jonas Waeber's avatar
Jonas Waeber committed
32
33
34
35
36
        uri = resource.uri

        val identifier = model.createResource()
        identifier.addProperty(RDF.type, RICO.Identifier)
        identifier.addProperty(RICO.type, literal("main"))
Jonas Waeber's avatar
Jonas Waeber committed
37
        identifier.addProperty(RICO.identifier, literal(id))
Jonas Waeber's avatar
Jonas Waeber committed
38
39
        resource.addProperty(RICO.identifiedBy, identifier)

Jonas Waeber's avatar
Jonas Waeber committed
40
        // TODO: proper multi language integration!
Jonas Waeber's avatar
Jonas Waeber committed
41
42
        resource.addProperty(RICO.name, langLiteral(source["field_name"] as String, language))
        resource.addProperty(RICO.descriptiveNote, langLiteral(source["field_text"] as String, language))
Jonas Waeber's avatar
Jonas Waeber committed
43

Jonas Waeber's avatar
Jonas Waeber committed
44
45
46
47
48
        source["field_addresses"].let { fieldAddressValue ->
            try {
                fieldAddressValue as List<JsonObject>
                fieldAddressValue.forEach { fieldAddress ->
                    extractAddressField(resource, fieldAddress)
Jonas Waeber's avatar
Jonas Waeber committed
49
                }
Jonas Waeber's avatar
Jonas Waeber committed
50
51
            } catch (ex: ClassCastException) {
                log.warn("Could not cast field_addresses to JsonObject: $fieldAddressValue.")
Jonas Waeber's avatar
Jonas Waeber committed
52
            }
Jonas Waeber's avatar
Jonas Waeber committed
53
54
        }

Jonas Waeber's avatar
Jonas Waeber committed
55
56
57
58
59
60
61
62
        extractSimpleField(resource, WD.isil, source, "field_isil")
        extractSimpleField(resource, WD.website, source, "field_website")
        extractSimpleField(resource, WD.emailAddress, source, "field_email")
        extractSimpleField(resource, WD.onlineArchive, source, "field_online_archive")
        extractSimpleField(resource, SCHEMA.sameAs, source, "wikidata_id")
        extractSimpleField(resource, WD.image, source, "image")
        extractSimpleField(resource, WD.logo, source, "logo")
        extractSimpleField(resource, WD.typeOfInstitution, source, "instance_of")
Jonas Waeber's avatar
Jonas Waeber committed
63
64
65
66
67
68
69
70
71
72
        return this
    }

    fun write(): KeyValue<String, String> {
        return StringWriter().use { writer ->
            RDFDataMgr.write(writer, model, RDFFormat.NTRIPLES_UTF8)
            return@use KeyValue(uri, writer.toString().trim())
        }
    }

Jonas Waeber's avatar
Jonas Waeber committed
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
    private fun extractSimpleField(resource: Resource, property: Property, source: JsonObject, fieldName: String) {
        source[fieldName].let {
            if (it is String) {
                resource.addProperty(property, literal(it))
            } else {
                log.warn("No value for $fieldName found in source for institution $uri.")
            }
        }
    }

    private fun extractAddressField(resource: Resource, fieldAddress: JsonObject) {
        val location = model.createResource()
        val streetAddress = fieldAddress["address_line1"] as String
        val secondAddressLine = fieldAddress["address_line2"] as String?
        val combinedStreetAddress = if (secondAddressLine != null) {
            streetAddress + "\n" + secondAddressLine
        } else {
            streetAddress
        }
        val streetNumber = streetAddress.substringAfterLast(" ")
        val street = streetAddress.replace(streetNumber, "").trim()

        val postalCode = (fieldAddress["postal_code"] as String).trim()
        val municipality = if (municipalities.containsKey(postalCode)) {
            municipalities[postalCode]
        } else {
            // the input validation in drupal should ensure that this never happens.
            log.error("Invalid postal code: $postalCode")
            null
        }
        location.addProperty(RDF.type, WD.location)
        location.addProperty(WD.street, literal(street))
        location.addProperty(WD.streetNumber, literal(streetNumber))
        location.addProperty(WD.streetAddress, literal(combinedStreetAddress))
        location.addProperty(WD.postalCode, literal(postalCode))
        // does not enrich city, canton or cantons, if the postal code is not in the list.
        if (municipality != null) {
            // canton
            location.addProperty(WD.adminUnit, model.createResource(municipality.canton))
            // city
            location.addProperty(WD.adminUnit, model.createResource(municipality.id))
            // coordinates
            municipality.coordinates.forEach { coordinate ->
                location.addProperty(WD.coordinates, literal(coordinate))
            }
        }
        //val country = it["country_code"] as String
        // country is currently hard coded to switzerland!
        location.addProperty(WD.country, WD.switzerland)
        resource.addProperty(WD.streetAddress, location)
    }
Jonas Waeber's avatar
Jonas Waeber committed
124
125
126
127

    private fun langLiteral(text: String, language: String): Literal = model.createLiteral(text.trim(), language)
    private fun literal(text: String): Literal = model.createLiteral(text.trim())
}