KafkaTopology.kt 6.66 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
 * Table Data Import Service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase

21
22
23
import ch.memobase.reporting.Report
import ch.memobase.reporting.ReportStatus
import ch.memobase.settings.SettingsLoader
Jonas Waeber's avatar
Jonas Waeber committed
24
25
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.kotlin.registerKotlinModule
26
import java.io.StringWriter
Jonas Waeber's avatar
Jonas Waeber committed
27
28
import org.apache.kafka.streams.StreamsBuilder
import org.apache.kafka.streams.Topology
29
30
import org.apache.kafka.streams.kstream.KStream
import org.apache.kafka.streams.kstream.Predicate
Jonas Waeber's avatar
Jonas Waeber committed
31
import org.apache.logging.log4j.LogManager
32
import org.memobase.helpers.DocumentTypeMapper
33
import org.memobase.helpers.ElasticSearchWrapper
34
import org.memobase.helpers.InstitutionTypeMapper
Jonas Waeber's avatar
Jonas Waeber committed
35
import org.memobase.helpers.JSON
36
import org.memobase.helpers.KEYS
37
import org.memobase.helpers.KEYS.SettingsProps
38
import org.memobase.model.DocumentsSearchDoc
39
40
import org.memobase.model.InstitutionSearchDoc
import org.memobase.model.RecordSetSearchDoc
41
import org.memobase.model.Schema
Jonas Waeber's avatar
Jonas Waeber committed
42
43

class KafkaTopology(private val settings: SettingsLoader) {
Jonas Waeber's avatar
Jonas Waeber committed
44
45
    private val log = LogManager.getLogger("SearchDocService")

46
    private val appSettings = settings.appSettings
47
48
49
    private val mediaUrl = appSettings.getProperty(SettingsProps.mediaUrl)
    private val documentMapperPath = appSettings.getProperty(SettingsProps.documentTypeLabelsPath)
    private val institutionMapperPath = appSettings.getProperty(SettingsProps.institutionTypeLabelsPath)
Jonas Waeber's avatar
Jonas Waeber committed
50
    private val reportTopic = settings.processReportTopic
Jonas Waeber's avatar
Jonas Waeber committed
51

52
53
54
55
56
    private val documentTypeMapper = DocumentTypeMapper(documentMapperPath)
    private val institutionTypeMapper = InstitutionTypeMapper(institutionMapperPath)

    private val documentSearchDocBuilder = DocumentsSearchDocBuilder(documentTypeMapper, mediaUrl)
    private val institutionSearchDoc = InstitutionSearchDocBuilder(institutionTypeMapper, appSettings)
57

58
59
60
61
    private val elasticSearchWrapper = ElasticSearchWrapper(settings.appSettings)
    private val recordSetSearchDocBuilder =
            RecordSetSearchDocBuilder(elasticSearchWrapper)

62
63
    private val jsonWriter = ObjectMapper().registerKotlinModule().writer()

Jonas Waeber's avatar
Jonas Waeber committed
64
65
66
    fun build(): Topology {
        val builder = StreamsBuilder()
        val stream = builder.stream<String, String>(settings.inputTopic)
67
        val branchedStream = stream
68
69
70
71
72
73
74
75
76
                .mapValues { value -> JSON.parse(value) }
                .filter { _, value -> value.isNotEmpty() }
                .mapValues { value -> JSON.unpack(value) }
                .branch(
                        Predicate { _, value -> value.containsKey(JSON.record) },
                        Predicate { _, value -> value.containsKey(JSON.institution) },
                        Predicate { _, value -> value.containsKey(JSON.recordSet) },
                        Predicate { _, _ -> true }
                )
77
78

        val recordStream = branchedStream[0]
79
80
81
                .mapValues { readOnlyKey, value ->
                    try {
                        Pair(
82
                                documentSearchDocBuilder.transform(value),
83
84
85
86
87
                                Report(readOnlyKey, ReportStatus.success, "Transformed message into search doc.", Service.name)
                        )
                    } catch (ex: InvalidInputException) {
                        Pair(DocumentsSearchDoc.DEFAULT, Report(readOnlyKey, ReportStatus.warning, ex.localizedMessage, Service.name))
                    }
88
89
90
91
92
                }

        outputStreams(recordStream)

        val institutionStream = branchedStream[1]
93
94
95
96
97
98
99
                .mapValues { readOnlyKey, value ->
                    try {
                        Pair(
                                institutionSearchDoc.transform(readOnlyKey, value),
                                Report(readOnlyKey, ReportStatus.success, "Transformed message into search doc.", Service.name)
                        )
                    } catch (ex: InvalidInputException) {
100
                        Pair(InstitutionSearchDoc.DEFAULT, Report(
101
102
103
104
105
                                readOnlyKey,
                                ReportStatus.warning,
                                ex.localizedMessage,
                                Service.name))
                    }
Jonas Waeber's avatar
Jonas Waeber committed
106
                }
107
        outputStreams(institutionStream)
108

109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
        val recordSetStream = branchedStream[2]
                .mapValues { readOnlyKey, value ->
                    try {
                        Pair(
                                recordSetSearchDocBuilder.transform(readOnlyKey, value),
                                Report(readOnlyKey, ReportStatus.success, "Transformed message into search doc.", Service.name)
                        )
                    } catch (ex: InvalidInputException) {
                        Pair(RecordSetSearchDoc.DEFAULT, Report(
                                readOnlyKey,
                                ReportStatus.warning,
                                ex.localizedMessage,
                                Service.name))
                    }
                }
        outputStreams(recordSetStream)

126
        branchedStream[3]
127
128
129
130
131
132
133
134
135
                .mapValues { readOnlyKey, value ->
                    Report(
                            readOnlyKey,
                            ReportStatus.fatal,
                            "No record, memobase institution or record set present in input data: $value.",
                            Service.name
                    )
                }
                .to(reportTopic)
136
137
138
139
140
        return builder.build()
    }

    private fun outputStreams(stream: KStream<String, Pair<Schema, Report>>) {
        stream
141
142
                .mapValues { value -> value.second.toJson() }
                .to(reportTopic)
Jonas Waeber's avatar
Jonas Waeber committed
143

144
        stream
145
                .filterNot { _, value -> value.second.status == ReportStatus.fatal }
146
147
148
149
150
151
152
                .mapValues { value -> value.first }
                .mapValues { value ->
                    val out = StringWriter()
                    jsonWriter.writeValue(out, value)
                    out.toString()
                }
                .to(settings.outputTopic)
Jonas Waeber's avatar
Jonas Waeber committed
153
154
    }
}