KafkaTopology.kt 6.64 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
/*
Jonas Waeber's avatar
Jonas Waeber committed
2
 * search-doc-service
Jonas Waeber's avatar
Jonas Waeber committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase

21
22
23
import ch.memobase.reporting.Report
import ch.memobase.reporting.ReportStatus
import ch.memobase.settings.SettingsLoader
Jonas Waeber's avatar
Jonas Waeber committed
24
25
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.kotlin.registerKotlinModule
26
import java.io.StringWriter
Jonas Waeber's avatar
Jonas Waeber committed
27
28
import org.apache.kafka.streams.StreamsBuilder
import org.apache.kafka.streams.Topology
29
30
import org.apache.kafka.streams.kstream.KStream
import org.apache.kafka.streams.kstream.Predicate
Jonas Waeber's avatar
Jonas Waeber committed
31
import org.apache.logging.log4j.LogManager
32
import org.memobase.helpers.DocumentTypeMapper
33
import org.memobase.helpers.ElasticSearchWrapper
34
import org.memobase.helpers.InstitutionTypeMapper
Jonas Waeber's avatar
Jonas Waeber committed
35
import org.memobase.helpers.JSON
36
import org.memobase.helpers.KEYS.SettingsProps
37
import org.memobase.model.DocumentsSearchDoc
38
39
import org.memobase.model.InstitutionSearchDoc
import org.memobase.model.RecordSetSearchDoc
40
import org.memobase.model.Schema
Jonas Waeber's avatar
Jonas Waeber committed
41
42

class KafkaTopology(private val settings: SettingsLoader) {
Jonas Waeber's avatar
Jonas Waeber committed
43
44
    private val log = LogManager.getLogger("SearchDocService")

45
    private val appSettings = settings.appSettings
46
47
48
    private val mediaUrl = appSettings.getProperty(SettingsProps.mediaUrl)
    private val documentMapperPath = appSettings.getProperty(SettingsProps.documentTypeLabelsPath)
    private val institutionMapperPath = appSettings.getProperty(SettingsProps.institutionTypeLabelsPath)
Jonas Waeber's avatar
Jonas Waeber committed
49
    private val reportTopic = settings.processReportTopic
Jonas Waeber's avatar
Jonas Waeber committed
50

51
52
53
54
55
    private val documentTypeMapper = DocumentTypeMapper(documentMapperPath)
    private val institutionTypeMapper = InstitutionTypeMapper(institutionMapperPath)

    private val documentSearchDocBuilder = DocumentsSearchDocBuilder(documentTypeMapper, mediaUrl)
    private val institutionSearchDoc = InstitutionSearchDocBuilder(institutionTypeMapper, appSettings)
56

57
58
59
60
    private val elasticSearchWrapper = ElasticSearchWrapper(settings.appSettings)
    private val recordSetSearchDocBuilder =
            RecordSetSearchDocBuilder(elasticSearchWrapper)

61
62
    private val jsonWriter = ObjectMapper().registerKotlinModule().writer()

Jonas Waeber's avatar
Jonas Waeber committed
63
64
65
    fun build(): Topology {
        val builder = StreamsBuilder()
        val stream = builder.stream<String, String>(settings.inputTopic)
66
        val branchedStream = stream
67
68
69
70
                .mapValues { value -> JSON.parse(value) }
                .filter { _, value -> value.isNotEmpty() }
                .mapValues { value -> JSON.unpack(value) }
                .branch(
Jonas Waeber's avatar
Jonas Waeber committed
71
72
73
                        Predicate { _, value -> value.containsKey(JSON.recordTag) },
                        Predicate { _, value -> value.containsKey(JSON.institutionTag) },
                        Predicate { _, value -> value.containsKey(JSON.recordSetTag) },
74
75
                        Predicate { _, _ -> true }
                )
76
77

        val recordStream = branchedStream[0]
78
79
80
                .mapValues { readOnlyKey, value ->
                    try {
                        Pair(
81
                                documentSearchDocBuilder.transform(readOnlyKey, value),
82
83
84
85
86
                                Report(readOnlyKey, ReportStatus.success, "Transformed message into search doc.", Service.name)
                        )
                    } catch (ex: InvalidInputException) {
                        Pair(DocumentsSearchDoc.DEFAULT, Report(readOnlyKey, ReportStatus.warning, ex.localizedMessage, Service.name))
                    }
87
88
89
90
91
                }

        outputStreams(recordStream)

        val institutionStream = branchedStream[1]
92
93
94
95
96
97
98
                .mapValues { readOnlyKey, value ->
                    try {
                        Pair(
                                institutionSearchDoc.transform(readOnlyKey, value),
                                Report(readOnlyKey, ReportStatus.success, "Transformed message into search doc.", Service.name)
                        )
                    } catch (ex: InvalidInputException) {
99
                        Pair(InstitutionSearchDoc.DEFAULT, Report(
100
101
102
103
104
                                readOnlyKey,
                                ReportStatus.warning,
                                ex.localizedMessage,
                                Service.name))
                    }
Jonas Waeber's avatar
Jonas Waeber committed
105
                }
106
        outputStreams(institutionStream)
107

108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
        val recordSetStream = branchedStream[2]
                .mapValues { readOnlyKey, value ->
                    try {
                        Pair(
                                recordSetSearchDocBuilder.transform(readOnlyKey, value),
                                Report(readOnlyKey, ReportStatus.success, "Transformed message into search doc.", Service.name)
                        )
                    } catch (ex: InvalidInputException) {
                        Pair(RecordSetSearchDoc.DEFAULT, Report(
                                readOnlyKey,
                                ReportStatus.warning,
                                ex.localizedMessage,
                                Service.name))
                    }
                }
        outputStreams(recordSetStream)

125
        branchedStream[3]
126
127
128
129
130
131
132
133
134
                .mapValues { readOnlyKey, value ->
                    Report(
                            readOnlyKey,
                            ReportStatus.fatal,
                            "No record, memobase institution or record set present in input data: $value.",
                            Service.name
                    )
                }
                .to(reportTopic)
135
136
137
138
139
        return builder.build()
    }

    private fun outputStreams(stream: KStream<String, Pair<Schema, Report>>) {
        stream
140
141
                .mapValues { value -> value.second.toJson() }
                .to(reportTopic)
Jonas Waeber's avatar
Jonas Waeber committed
142

143
        stream
144
                .filterNot { _, value -> value.second.status == ReportStatus.fatal }
145
146
147
148
149
150
151
                .mapValues { value -> value.first }
                .mapValues { value ->
                    val out = StringWriter()
                    jsonWriter.writeValue(out, value)
                    out.toString()
                }
                .to(settings.outputTopic)
Jonas Waeber's avatar
Jonas Waeber committed
152
153
    }
}