KafkaTopology.kt 6.91 KB
Newer Older
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
1
/*
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
2
3
 * import-process-bridge
 * Copyright (C) 2021  Memoriav
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
4
5
 *
 * This program is free software: you can redistribute it and/or modify
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
6
7
 * it under the terms of the GNU Affero General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
8
9
10
11
12
13
14
15
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
16
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
17
18
19
20
21
22
23
24
 */

package org.memobase

import ch.memobase.rdf.MB
import ch.memobase.rdf.RDF
import ch.memobase.rdf.RICO
import ch.memobase.reporting.Report
Jonas Waeber's avatar
Jonas Waeber committed
25
import ch.memobase.reporting.ReportStatus
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
26
27
28
29
30
31
32
33
34
35
36
37
38
import ch.memobase.settings.HeaderExtractionTransformSupplier
import ch.memobase.settings.HeaderMetadata
import ch.memobase.settings.SettingsLoader
import org.apache.jena.rdf.model.Model
import org.apache.jena.riot.Lang
import org.apache.jena.riot.RDFDataMgr
import org.apache.jena.riot.RiotException
import org.apache.kafka.streams.StreamsBuilder
import org.apache.kafka.streams.Topology
import org.apache.kafka.streams.kstream.Predicate
import org.apache.logging.log4j.LogManager
import org.memobase.model.MemobaseModel
import org.memobase.model.ProcessResult
39
import org.memobase.model.Transaction
Jonas Waeber's avatar
Jonas Waeber committed
40
import java.nio.charset.StandardCharsets
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
41
42
43
44
45

class KafkaTopology(
    private val settings: SettingsLoader
) {
    private val log = LogManager.getLogger("ImportProcessBridge")
Jonas Waeber's avatar
Jonas Waeber committed
46
    private val step = settings.appSettings.getProperty(Service.reportingStepNameProp)
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64

    fun build(): Topology {
        val builder = StreamsBuilder()

        val stream = builder.stream<String, String>(settings.inputTopic)

        val processOutcome = stream
            .transformValues(HeaderExtractionTransformSupplier<String>())
            .mapValues { value -> parseNtriples(value) }
            .mapValues { value -> addEventType(value) }
            .mapValues { value -> writeJsonLd(value) }
            .branch(
                Predicate { key, _ -> key.contains("/record/") },
                Predicate { key, _ -> key.contains("/recordSet/") },
                Predicate { key, _ -> key.contains("/institution/") },
                Predicate { _, _ -> true }
            )

Jonas Waeber's avatar
Jonas Waeber committed
65
66

        processOutcome[0]
Jonas Waeber's avatar
Jonas Waeber committed
67
            .filter { _, value -> value.status != ReportStatus.fatal }
Jonas Waeber's avatar
Jonas Waeber committed
68
            .mapValues { value -> value.data }
69
            .to(settings.outputTopic + "-records-" + settings.appSettings.getProperty("topicOutPostfix"))
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
70

Jonas Waeber's avatar
Jonas Waeber committed
71
        processOutcome[0]
Jonas Waeber's avatar
Jonas Waeber committed
72
            .filter { _, value -> value.status != ReportStatus.fatal }
73
74
75
            .mapValues { key, _ -> writeTransaction(key) }
            .to(settings.appSettings.getProperty("topicTransactionsRecords"))

Jonas Waeber's avatar
Jonas Waeber committed
76
        processOutcome[0]
Jonas Waeber's avatar
Jonas Waeber committed
77
            .mapValues { key, value -> writeReport(key, value, step) }
Jonas Waeber's avatar
Jonas Waeber committed
78
79
80
            .to(settings.processReportTopic)

        processOutcome[1]
Jonas Waeber's avatar
Jonas Waeber committed
81
            .filter { _, value -> value.status != ReportStatus.fatal }
Jonas Waeber's avatar
Jonas Waeber committed
82
            .mapValues { value -> value.data }
83
            .to(settings.outputTopic + "-record-sets-" + settings.appSettings.getProperty("topicOutPostfix"))
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
84

Jonas Waeber's avatar
Jonas Waeber committed
85
        processOutcome[1]
86
87
88
            .mapValues { key, _ -> writeTransaction(key) }
            .to(settings.appSettings.getProperty("topicTransactionsRecordSets"))

Jonas Waeber's avatar
Jonas Waeber committed
89
        processOutcome[1]
Jonas Waeber's avatar
Jonas Waeber committed
90
            .mapValues { key, value -> writeReport(key, value, step) }
Jonas Waeber's avatar
Jonas Waeber committed
91
92
93
            .to(settings.processReportTopic)

        processOutcome[2]
Jonas Waeber's avatar
Jonas Waeber committed
94
            .filter { _, value -> value.status != ReportStatus.fatal }
Jonas Waeber's avatar
Jonas Waeber committed
95
            .mapValues { value -> value.data }
96
            .to(settings.outputTopic + "-institutions-" + settings.appSettings.getProperty("topicOutPostfix"))
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
97

Jonas Waeber's avatar
Jonas Waeber committed
98
        processOutcome[2]
99
100
101
            .mapValues { key, _ -> writeTransaction(key) }
            .to(settings.appSettings.getProperty("topicTransactionsInstitutions"))

Jonas Waeber's avatar
Jonas Waeber committed
102
        processOutcome[2]
Jonas Waeber's avatar
Jonas Waeber committed
103
            .mapValues { key, value -> writeReport(key, value, step) }
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
104
105
            .to(settings.processReportTopic)

Jonas Waeber's avatar
Jonas Waeber committed
106
107
108
109
110
111
        processOutcome[3]
            .mapValues { key, value ->
                writeReport(
                    key,
                    ProcessResult(
                        value.data,
Jonas Waeber's avatar
Jonas Waeber committed
112
                        ReportStatus.fatal,
Jonas Waeber's avatar
Jonas Waeber committed
113
114
                        "Unable to match key type to either records, record sets or institutions."
                    ),
Jonas Waeber's avatar
Jonas Waeber committed
115
                    step
Jonas Waeber's avatar
Jonas Waeber committed
116
117
118
119
120
121
122
123
124
125
126
127
                )
            }
            .to(settings.processReportTopic)



        processOutcome[3]
            .mapValues { key, value ->
                writeReport(
                    key,
                    ProcessResult(
                        value.data,
Jonas Waeber's avatar
Jonas Waeber committed
128
                        ReportStatus.fatal,
Jonas Waeber's avatar
Jonas Waeber committed
129
130
                        "Unable to match key type to either records, record sets or institutions."
                    ),
Jonas Waeber's avatar
Jonas Waeber committed
131
                    step
Jonas Waeber's avatar
Jonas Waeber committed
132
133
                )
            }
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
134
135
136
137
138
139
140
141
142
143
144
145
            .to(settings.processReportTopic)

        return builder.build()
    }

    private fun parseNtriples(input: Pair<String, HeaderMetadata>): ProcessResult<Model> {
        val model = MemobaseModel()
        try {
            RDFDataMgr.read(model, input.first.byteInputStream(StandardCharsets.UTF_8), Lang.NT)
        } catch (ex: RiotException) {
            log.error("Parsing error: ${ex.message}")
            log.debug(ex.stackTrace)
Jonas Waeber's avatar
Jonas Waeber committed
146
            return ProcessResult(model, ReportStatus.fatal, "Parsing of Ntriples failed!")
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
147
        }
Jonas Waeber's avatar
Jonas Waeber committed
148
        return ProcessResult(model, ReportStatus.success, "Parsing successful")
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
149
150
151
    }

    private fun writeJsonLd(input: ProcessResult<Model>): ProcessResult<String> {
Jonas Waeber's avatar
Jonas Waeber committed
152
        return if (input.status != ReportStatus.success) {
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
            ProcessResult("", input.status, input.message)
        } else {
            ProcessResult(input.data.toString(), input.status, "Transformation successful")
        }
    }

    /**
     *  Adds the type of the event message to the core resources.
     *  Core resources are rico:Record, rico:CorporateBody (for institutions), rico:RecordSet and rico:Instantiations.
     *  Downstream services should remove this property before publishing the data to the outside world!
     */
    private fun addEventType(input: ProcessResult<Model>): ProcessResult<Model> {
        listOf(RICO.Record, RICO.CorporateBody, RICO.RecordSet, RICO.Instantiation).forEach {
            val record = input.data.listSubjectsWithProperty(RDF.type, it).toList()
            if (record.isNotEmpty()) {
                record[0].addProperty(MB.eventType, "CREATE")
            }
        }
        return input
    }

Jonas Waeber's avatar
Jonas Waeber committed
174
    private fun writeReport(id: String, input: ProcessResult<String>, step: String): String {
Jonas Waeber's avatar
Jonas Waeber committed
175
        return Report(id, input.status, input.message, step).toJson()
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
176
    }
177
178
179
180

    private fun writeTransaction(id: String): String {
        return Transaction(id).toJson()
    }
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
181
}