SAXContentHandler.kt 7.11 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/*
 * xml-data-transform
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

Jonas Waeber's avatar
Jonas Waeber committed
19
package org.memobase.xml
Jonas Waeber's avatar
Jonas Waeber committed
20

Jonas Waeber's avatar
Jonas Waeber committed
21
22
import ch.memobase.reporting.Report
import ch.memobase.reporting.ReportStatus
23
import com.beust.klaxon.JsonArray
Jonas Waeber's avatar
Jonas Waeber committed
24
import com.beust.klaxon.JsonObject
Jonas Waeber's avatar
Jonas Waeber committed
25
import java.io.StringWriter
26
import org.apache.logging.log4j.LogManager
Jonas Waeber's avatar
Jonas Waeber committed
27
import org.memobase.Service
Jonas Waeber's avatar
Jonas Waeber committed
28
29
30
31
import org.xml.sax.Attributes
import org.xml.sax.ContentHandler
import org.xml.sax.Locator

Jonas Waeber's avatar
Jonas Waeber committed
32
33
34
35
36
37
38
39
40
41
/**
 * Class to transform a xml stream into a json representation. Expects a flat xml preprocessed with
 * a xslt if necessary.
 *
 * Can only handle elements up to one level deep and ignores attributes.
 *
 * @param key The key of the kafka message.
 * @param identifierFieldName The field name of the unique identifier of this record.
 * @param recordTag The root tag of the xml structure.
 */
Jonas Waeber's avatar
Jonas Waeber committed
42
43
44
45
46
class SAXContentHandler(
    private val key: String,
    private val identifierFieldName: String,
    private val recordTag: String
) :
Jonas Waeber's avatar
Jonas Waeber committed
47
    ContentHandler {
48
49
    private val log = LogManager.getLogger("SAXHandler")

Jonas Waeber's avatar
Jonas Waeber committed
50
51
52
    /**
     * The json representation of the xml stream after processing.
     */
Jonas Waeber's avatar
Jonas Waeber committed
53
    val output = StringWriter()
Jonas Waeber's avatar
Jonas Waeber committed
54
55
56
57

    /**
     * The identifier is used as a message key for the outgoing message.
     */
Jonas Waeber's avatar
Jonas Waeber committed
58
    var identifier: String? = null
Jonas Waeber's avatar
Jonas Waeber committed
59
    private var report: Report? = null
Jonas Waeber's avatar
Jonas Waeber committed
60
61
    private val jsonResult = JsonObject()

Jonas Waeber's avatar
Jonas Waeber committed
62
63
64
    /**
     * @return A report on the status of the transformation.
     */
Jonas Waeber's avatar
Jonas Waeber committed
65
66
67
68
    fun getReport(): Report {
        return report.let {
            it
                ?: Report(
Jonas Waeber's avatar
Jonas Waeber committed
69
                    identifier ?: key,
Jonas Waeber's avatar
Jonas Waeber committed
70
71
72
                    ReportStatus.fatal,
                    "Unknown Failure: No report found.",
                    Service.name
Jonas Waeber's avatar
Jonas Waeber committed
73
74
75
76
                )
        }
    }

Jonas Waeber's avatar
Jonas Waeber committed
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
    private var currentElementTag: String = ""
    private var currentInnerElementTag: String = ""
    private var currentElementContent: String = ""
    private var currentInnerElementContent: String = ""
    private val innerElements = mutableListOf<Pair<String, String>>()

    private val invalidPropertyNameCharacters = Regex("[.+,\\\\]")

    private var reportText = ""

    override fun setDocumentLocator(p0: Locator?) {
        // NOT NEEDED...
    }

    override fun startDocument() {
        // Do nothing...
    }

    override fun processingInstruction(p0: String?, p1: String?) {
        // NOT NEEDED
    }

    override fun skippedEntity(p0: String?) {
        // NOT NEEDED
    }

    override fun ignorableWhitespace(p0: CharArray?, p1: Int, p2: Int) {
        // Do Nothing
    }

    override fun startPrefixMapping(p0: String?, p1: String?) {
        // NOT NEEDED
    }

    override fun endPrefixMapping(p0: String?) {
        // NOT NEEDED.
    }

    override fun characters(characters: CharArray?, start: Int, size: Int) {
        if (characters != null) {
117
118
119
120
121
122
123
124
125
126
            val line = characters.joinToString("").trim()
            if (line.isNotEmpty()) {
                when {
                    currentInnerElementTag != "" -> {
                        currentInnerElementContent += line
                    }
                    currentElementTag != "" -> {
                        currentElementContent += line
                    }
                    else -> {
Jonas Waeber's avatar
Jonas Waeber committed
127
                        log.warn("Content without Tags: $line.")
128
129
                    }
                }
Jonas Waeber's avatar
Jonas Waeber committed
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
            }
        }
    }

    override fun startElement(uri: String?, localName: String?, qName: String?, attributes: Attributes?) {
        if (attributes != null && attributes.length > 0)
            reportText += "Found an attribute. There should be non left...\n"
        if (localName == null) {
            reportText += "Found no local name in element $uri.\n"
            return
        }
        if (localName.contains(invalidPropertyNameCharacters)) {
            reportText += "Contains invalid characters in property '$localName': $invalidPropertyNameCharacters."
            return
        }
        if (recordTag == localName)
            return
        if (currentElementTag == "") {
            currentElementTag = localName

        } else {
            currentInnerElementTag = localName
        }
    }

    override fun endElement(uri: String?, localName: String?, qName: String?) {
        if (recordTag == localName)
            return

159
160
        when {
            currentElementTag == localName -> {
161
162
                // extract the identifier value based on the given field name from configuration.
                // this is used for the message key
163
164
165
166
167
                if (currentElementTag == identifierFieldName) {
                    identifier = currentElementContent
                }
                if (innerElements.isEmpty()) {
                    if (currentElementContent.isNotEmpty()) {
168
169
                        when (val item = jsonResult[currentElementTag]){
                            null -> jsonResult[currentElementTag] = currentElementContent
170
                            is JsonArray<*> -> jsonResult[currentElementTag] = item + currentElementContent
171
172
                            else -> jsonResult[currentElementTag] = JsonArray(item, currentElementContent)
                        }
173
174
                    }
                } else {
175
176
                    when (val item = jsonResult[currentElementTag]) {
                        null -> jsonResult[currentElementTag] = innerElements.toMap()
177
                        is JsonArray<*> -> jsonResult[currentElementTag] = item + innerElements.toMap()
178
179
                        else -> jsonResult[currentElementTag] = JsonArray(item, innerElements.toMap())
                    }
180
181
182
183
                    innerElements.clear()
                }
                currentElementTag = ""
                currentElementContent = ""
Jonas Waeber's avatar
Jonas Waeber committed
184
            }
185
186
187
            currentInnerElementTag == localName -> {
                if (currentInnerElementContent.isNotEmpty()) {
                    innerElements.add(Pair(currentInnerElementTag, currentInnerElementContent))
188
                }
189
190
                currentInnerElementTag = ""
                currentInnerElementContent = ""
Jonas Waeber's avatar
Jonas Waeber committed
191
            }
192
193
            else -> {
                reportText += "Unmatched end element: $localName.\n"
194
            }
Jonas Waeber's avatar
Jonas Waeber committed
195
196
197
198
199
200
        }
    }

    override fun endDocument() {
        output.write(jsonResult.toJsonString())
        report = Report(
Jonas Waeber's avatar
Jonas Waeber committed
201
            id = identifier ?: key,
Jonas Waeber's avatar
Jonas Waeber committed
202
203
204
            status = if (reportText.isEmpty()) ReportStatus.success else ReportStatus.fatal,
            message = if (reportText.isEmpty()) "Successfully transformed xml to json!" else reportText.trim(),
            step = Service.name
Jonas Waeber's avatar
Jonas Waeber committed
205
206
207
        )
    }
}