Transform.kt 4.94 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
3
4
package org.memobase

import org.apache.jena.rdf.model.Literal
import org.apache.jena.rdf.model.ModelFactory
Jonas Waeber's avatar
Jonas Waeber committed
5
import org.apache.jena.rdf.model.Resource
Jonas Waeber's avatar
Jonas Waeber committed
6
7
8
import org.apache.jena.riot.RDFDataMgr
import org.apache.jena.riot.RDFFormat
import org.apache.kafka.streams.KeyValue
Jonas Waeber's avatar
Jonas Waeber committed
9
import org.apache.logging.log4j.LogManager
10
11
12
13
14
15
16
import org.memobase.model.MergedAddress
import org.memobase.model.MergedInstitution
import org.memobase.model.Municipality
import org.memobase.rdf.NS
import org.memobase.rdf.RDF
import org.memobase.rdf.RICO
import org.memobase.rdf.WD
Jonas Waeber's avatar
Jonas Waeber committed
17
18
import java.io.StringWriter

Jonas Waeber's avatar
Jonas Waeber committed
19
20
class Transform(private val municipalities: Map<String, Municipality>) {
    private val log = LogManager.getLogger("Transform")
Jonas Waeber's avatar
Jonas Waeber committed
21
22

    private val model = ModelFactory.createDefaultModel()
23
    private var resource: Resource? = null
Jonas Waeber's avatar
Jonas Waeber committed
24
25
    private var uri: String = ""

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
    fun createInstitution(input: MergedInstitution): Transform {
        resource = model.createResource(NS.memint + input.id)

        resource.let { valResource ->
            if (valResource != null) {
                uri = valResource.uri

                val identifier = model.createResource()
                identifier.addProperty(RDF.type, RICO.Identifier)
                identifier.addProperty(RICO.type, literal("main"))
                identifier.addProperty(RICO.identifier, literal(input.id))
                valResource.addProperty(RICO.identifiedBy, identifier)

                // TODO: proper multi language integration!
                valResource.addProperty(RICO.name, langLiteral(input.name.de, "de"))
                valResource.addProperty(RICO.name, langLiteral(input.name.fr, "fr"))
                valResource.addProperty(RICO.name, langLiteral(input.name.it, "it"))
                valResource.addProperty(RICO.descriptiveNote, langLiteral(input.description.de, "de"))
                valResource.addProperty(RICO.descriptiveNote, langLiteral(input.description.fr, "fr"))
                valResource.addProperty(RICO.descriptiveNote, langLiteral(input.description.it, "it"))

                input.addresses.forEach {
                    extractAddressField(valResource, it)
Jonas Waeber's avatar
Jonas Waeber committed
49
                }
50
51
52
53
54

                valResource.addProperty(WD.isil, literal(input.isil))
                valResource.addProperty(WD.emailAddress, literal(input.contactEmail))
                valResource.addProperty(WD.website, literal(input.website))
                valResource.addProperty(WD.onlineArchive, literal(input.onlineCatalogueLink))
Jonas Waeber's avatar
Jonas Waeber committed
55
            }
Jonas Waeber's avatar
Jonas Waeber committed
56
57
        }

58

Jonas Waeber's avatar
Jonas Waeber committed
59
60
61
62
63
64
65
66
67
68
        return this
    }

    fun write(): KeyValue<String, String> {
        return StringWriter().use { writer ->
            RDFDataMgr.write(writer, model, RDFFormat.NTRIPLES_UTF8)
            return@use KeyValue(uri, writer.toString().trim())
        }
    }

69
70
71
72
73
74
75
76
    private fun extractAddressField(resource: Resource, mergedAddress: MergedAddress) {
        val location = model.createResource()

        listOf("de", "fr", "it").forEach {
            val streetAddress = mergedAddress.addressLine1.get(it)
            val secondAddressLine = mergedAddress.addressLine2.get(it)
            val combinedStreetAddress = if (secondAddressLine.isNotEmpty()) {
                streetAddress + "\n" + secondAddressLine
Jonas Waeber's avatar
Jonas Waeber committed
77
            } else {
78
                streetAddress
Jonas Waeber's avatar
Jonas Waeber committed
79
            }
80
81
82
83
84
            val streetNumber = streetAddress.substringAfterLast(" ")
            val street = streetAddress.replace(streetNumber, "").trim()
            location.addProperty(WD.street, langLiteral(street, it))
            location.addProperty(WD.streetNumber, literal(streetNumber))
            location.addProperty(WD.streetAddress, langLiteral(combinedStreetAddress, it))
Jonas Waeber's avatar
Jonas Waeber committed
85
86
        }

87
        val postalCode = mergedAddress.postalCode.trim()
Jonas Waeber's avatar
Jonas Waeber committed
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
        val municipality = if (municipalities.containsKey(postalCode)) {
            municipalities[postalCode]
        } else {
            // the input validation in drupal should ensure that this never happens.
            log.error("Invalid postal code: $postalCode")
            null
        }
        location.addProperty(RDF.type, WD.location)
        location.addProperty(WD.postalCode, literal(postalCode))
        // does not enrich city, canton or cantons, if the postal code is not in the list.
        if (municipality != null) {
            // canton
            location.addProperty(WD.adminUnit, model.createResource(municipality.canton))
            // city
            location.addProperty(WD.adminUnit, model.createResource(municipality.id))
            // coordinates
            municipality.coordinates.forEach { coordinate ->
                location.addProperty(WD.coordinates, literal(coordinate))
            }
        }
        // country is currently hard coded to switzerland!
        location.addProperty(WD.country, WD.switzerland)
        resource.addProperty(WD.streetAddress, location)
    }
Jonas Waeber's avatar
Jonas Waeber committed
112
113
114
115

    private fun langLiteral(text: String, language: String): Literal = model.createLiteral(text.trim(), language)
    private fun literal(text: String): Literal = model.createLiteral(text.trim())
}