TableParser.kt 12.2 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
/*
 * Table Data Import Service
Jonas Waeber's avatar
Jonas Waeber committed
3
 * Copyright (C) 2020-2021 Memoriav
Jonas Waeber's avatar
Jonas Waeber committed
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
package org.memobase

20
import ch.memobase.exceptions.InvalidInputException
Jonas Waeber's avatar
Jonas Waeber committed
21
22
23
import ch.memobase.reporting.Report
import ch.memobase.reporting.ReportStatus
import ch.memobase.settings.HeaderMetadata
Jonas Waeber's avatar
Jonas Waeber committed
24
25
import com.beust.klaxon.json
import com.github.doyaaaaaken.kotlincsv.dsl.csvReader
Jonas Waeber's avatar
Jonas Waeber committed
26
import com.github.doyaaaaaken.kotlincsv.util.CSVFieldNumDifferentException
Jonas Waeber's avatar
Jonas Waeber committed
27
import java.io.InputStream
Jonas Waeber's avatar
Jonas Waeber committed
28
29
30
31
32
33
import org.apache.poi.ss.usermodel.WorkbookFactory
import org.memobase.UtilityFunctions.retrieveCellValue
import org.memobase.UtilityFunctions.retrieveCells
import org.memobase.UtilityFunctions.zip
import org.memobase.models.Formats
import org.memobase.models.Message
34
import org.memobase.models.ReportMessages
Jonas Waeber's avatar
Jonas Waeber committed
35
36
import org.memobase.models.ResultMessage

Jonas Waeber's avatar
Jonas Waeber committed
37
class TableParser(private val step: String) {
Jonas Waeber's avatar
Jonas Waeber committed
38
39
    private val invalidPropertyNameCharacters = listOf('.', ':', '/', '+')

Jonas Waeber's avatar
Jonas Waeber committed
40
41
42
43
44
45
    fun parseTable(
        key: String,
        inputMessage: Message,
        metadata: HeaderMetadata,
        inputStream: InputStream
    ): List<ResultMessage> {
Jonas Waeber's avatar
Jonas Waeber committed
46
        return try {
Jonas Waeber's avatar
Jonas Waeber committed
47
            when (inputMessage.format) {
Jonas Waeber's avatar
Jonas Waeber committed
48
49
                Formats.xls, Formats.xlsx -> excelMapper(key, metadata, inputStream)
                Formats.csv, Formats.tsv -> csvMapper(key, inputMessage, metadata, inputStream)
50
                else -> throw InvalidInputException("Cannot parse the table with format ${inputMessage.format}.")
Jonas Waeber's avatar
Jonas Waeber committed
51
            }
Jonas Waeber's avatar
Jonas Waeber committed
52
53
        } catch (ex: CSVFieldNumDifferentException) {
            listOf(
Jonas Waeber's avatar
Jonas Waeber committed
54
55
56
57
58
59
                ResultMessage(
                    key,
                    null,
                    Report(
                        key,
                        ReportStatus.fatal,
60
                        ReportMessages.fatalCSV(ex.localizedMessage),
Jonas Waeber's avatar
Jonas Waeber committed
61
62
63
64
                        step
                    )
                )
            )
Jonas Waeber's avatar
Jonas Waeber committed
65
        } catch (ex: InvalidInputException) {
Jonas Waeber's avatar
Jonas Waeber committed
66
            listOf(
Jonas Waeber's avatar
Jonas Waeber committed
67
68
69
70
71
72
                ResultMessage(
                    key,
                    null,
                    Report(
                        key,
                        ReportStatus.fatal,
73
                        ReportMessages.fatalInput(ex.localizedMessage),
Jonas Waeber's avatar
Jonas Waeber committed
74
75
76
77
                        step
                    )
                )
            )
Jonas Waeber's avatar
Jonas Waeber committed
78
        } catch (ex: IllegalArgumentException) { // Sheet index does not exist
Jonas Waeber's avatar
Jonas Waeber committed
79
80
81
82
83
84
            listOf(
                ResultMessage(
                    key, null,
                    Report(
                        key,
                        ReportStatus.fatal,
85
86
87
88
89
90
91
92
93
94
95
96
97
                        ReportMessages.fatalSheetIndex(metadata.tableSheetIndex),
                        step
                    )
                )
            )
        } catch (ex: Exception) {
            listOf(
                ResultMessage(
                    key, null,
                    Report(
                        key,
                        ReportStatus.fatal,
                        ReportMessages.fatalUnknown(ex::class.java.simpleName, ex.localizedMessage),
Jonas Waeber's avatar
Jonas Waeber committed
98
99
100
101
                        step
                    )
                )
            )
Jonas Waeber's avatar
Jonas Waeber committed
102
103
104
        }
    }

Jonas Waeber's avatar
Jonas Waeber committed
105
106
107
108
109
110
    private fun csvMapper(
        key: String,
        value: Message,
        metadata: HeaderMetadata,
        inputStream: InputStream
    ): List<ResultMessage> {
Jonas Waeber's avatar
Jonas Waeber committed
111
        val resultMessages = mutableListOf<ResultMessage>()
Jonas Waeber's avatar
Jonas Waeber committed
112
113
        val identifierSet = mutableSetOf<String>()

114
        val reader =
Jonas Waeber's avatar
Jonas Waeber committed
115
116
117
118
119
120
            csvReader {
                this.quoteChar = '"'
                this.delimiter = if (value.format == Formats.csv) ',' else '\t'
                this.charset = Charsets.UTF_8.displayName()
                // this.skipEmptyLine = true
            }.readAll(inputStream)
121
122
123
124
        var headerProperties = emptyList<String>()
        var count = 0
        for (line in reader) {
            count += 1
Jonas Waeber's avatar
Jonas Waeber committed
125
126
            if (count <= metadata.tableHeaderCount) {
                if (count == metadata.tableHeaderIndex) {
127
128
129
130
131
                    headerProperties = line
                    headerProperties.forEachIndexed { index, property ->
                        val trimmedProperty = property.trim()
                        if (trimmedProperty.isEmpty()) {
                            throw InvalidInputException(
Jonas Waeber's avatar
Jonas Waeber committed
132
                                "Missing a property name on row $count in column ${index + 1}!"
133
134
135
136
                            )
                        }
                        if (trimmedProperty.any { value -> invalidPropertyNameCharacters.contains(value) }) {
                            throw InvalidInputException(
Jonas Waeber's avatar
Jonas Waeber committed
137
                                "Invalid property name $trimmedProperty on row $count in column ${index + 1}! You may not use the any of the following characters: + , . "
138
                            )
Jonas Waeber's avatar
Jonas Waeber committed
139
140
141
                        }
                    }
                }
142
143
144
145
                continue
            }
            // the -1 ensures, that users can start columns beginning at 1!
            val identifier: String = try {
Jonas Waeber's avatar
Jonas Waeber committed
146
                line[metadata.tableIdentifierIndex - 1].let { identifierValue ->
147
148
149
                    when (identifierValue) {
                        "" -> {
                            throw InvalidInputException(
Jonas Waeber's avatar
Jonas Waeber committed
150
                                "The row $count has an empty identifier in column ${metadata.tableIdentifierIndex}."
151
                            )
Jonas Waeber's avatar
Jonas Waeber committed
152
                        }
Jonas Waeber's avatar
Jonas Waeber committed
153
                        in identifierSet -> {
154
                            throw InvalidInputException(
Jonas Waeber's avatar
Jonas Waeber committed
155
                                "The row $count contains a duplicated identifier in column ${metadata.tableIdentifierIndex} with another row."
Jonas Waeber's avatar
Jonas Waeber committed
156
                            )
157
158
                        }
                        else -> {
Jonas Waeber's avatar
Jonas Waeber committed
159
                            identifierSet.add(identifierValue)
160
161
162
                            identifierValue
                        }
                    }
Jonas Waeber's avatar
Jonas Waeber committed
163
                }
164
165
            } catch (ex: InvalidInputException) {
                resultMessages.add(
Jonas Waeber's avatar
Jonas Waeber committed
166
167
168
169
170
171
                    ResultMessage(
                        key,
                        null,
                        Report(
                            key,
                            ReportStatus.fatal,
172
                            ReportMessages.fatalInput(ex.localizedMessage),
Jonas Waeber's avatar
Jonas Waeber committed
173
                            step
174
                        )
Jonas Waeber's avatar
Jonas Waeber committed
175
                    )
Jonas Waeber's avatar
Jonas Waeber committed
176
                )
177
                continue
Jonas Waeber's avatar
Jonas Waeber committed
178
            }
179
180
            val keyValueMap = json {
                obj(
Jonas Waeber's avatar
Jonas Waeber committed
181
                    zip(headerProperties, line)
182
183
184
                )
            }
            val report = Report(
Jonas Waeber's avatar
Jonas Waeber committed
185
186
                identifier,
                ReportStatus.success,
187
                ReportMessages.success(),
Jonas Waeber's avatar
Jonas Waeber committed
188
                step
189
190
            )
            resultMessages.add(ResultMessage(identifier, keyValueMap, report))
Jonas Waeber's avatar
Jonas Waeber committed
191
192
193
194
        }
        return resultMessages
    }

Jonas Waeber's avatar
Jonas Waeber committed
195
196
197
198
199
    private fun excelMapper(
        key: String,
        metadata: HeaderMetadata,
        inputStream: InputStream
    ): List<ResultMessage> {
200
201
        val identifierSet = mutableSetOf<String>()
        val propertiesList = mutableListOf<String>()
202
203
        inputStream.use { inputStream1 ->
            WorkbookFactory.create(inputStream1).use { workbook ->
Jonas Waeber's avatar
Jonas Waeber committed
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
                // sheet index is 0-based. This ensures that users can access sheet 1 with index 1!
                val sheet = workbook.getSheetAt(metadata.tableSheetIndex - 1)
                var count = 0
                return sheet.filterEmptyRows().map { row ->
                    count += 1
                    if (count <= metadata.tableHeaderCount) {
                        if (count == metadata.tableHeaderIndex) {
                            propertiesList.addAll(row.map { cell ->
                                if (retrieveCellValue(cell).isNotEmpty()) {
                                    if (retrieveCellValue(cell).any { char ->
                                            invalidPropertyNameCharacters.contains(
                                                char
                                            )
                                        }) {
                                        throw InvalidInputException(
                                            "The property in cell ${cell.address} contains one or more invalid characters: $invalidPropertyNameCharacters."
                                        )
                                    } else {
                                        retrieveCellValue(cell)
                                    }
                                } else {
Jonas Waeber's avatar
Jonas Waeber committed
225
                                    throw InvalidInputException(
Jonas Waeber's avatar
Jonas Waeber committed
226
                                        "The header index is missing a value in cell ${cell.address}"
Jonas Waeber's avatar
Jonas Waeber committed
227
228
                                    )
                                }
Jonas Waeber's avatar
Jonas Waeber committed
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
                            }.map { it.trim() })
                        }
                        null
                    } else {
                        val rowIdentifier: String = try {
                            row.getCell(metadata.tableIdentifierIndex - 1).let { cell ->
                                if (cell != null) {
                                    when (val cellValue = retrieveCellValue(cell)) {
                                        "" -> {
                                            throw InvalidInputException(
                                                "The row ${row.rowNum} has an empty identifier in column ${metadata.tableIdentifierIndex}."
                                            )
                                        }
                                        in identifierSet -> {
                                            throw InvalidInputException(
                                                "The row ${row.rowNum} contains a duplicated identifier in column ${metadata.tableIdentifierIndex} with another row."
                                            )
                                        }
                                        else -> {
                                            identifierSet.add(cellValue)
                                            cellValue
                                        }
                                    }
                                } else {
Jonas Waeber's avatar
Jonas Waeber committed
253
                                    throw InvalidInputException(
Jonas Waeber's avatar
Jonas Waeber committed
254
                                        "No cell found in row ${row.rowNum} for column ${metadata.tableIdentifierIndex}."
Jonas Waeber's avatar
Jonas Waeber committed
255
256
257
                                    )
                                }
                            }
Jonas Waeber's avatar
Jonas Waeber committed
258
259
260
261
262
263
264
265
                        } catch (ex: InvalidInputException) {
                            return@map ResultMessage(
                                key, null, Report(
                                    key,
                                    ReportStatus.fatal,
                                    ReportMessages.fatalInput(ex.localizedMessage),
                                    step
                                )
Jonas Waeber's avatar
Jonas Waeber committed
266
267
                            )
                        }
268

Jonas Waeber's avatar
Jonas Waeber committed
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
                        val jsonObject = json {
                            obj(
                                zip(
                                    propertiesList,
                                    retrieveCells(row, propertiesList.size - 1)
                                )
                            )
                        }
                        ResultMessage(
                            rowIdentifier, jsonObject, Report(
                                rowIdentifier,
                                ReportStatus.success,
                                ReportMessages.success(),
                                step
                            )
Jonas Waeber's avatar
Jonas Waeber committed
284
                        )
Jonas Waeber's avatar
Jonas Waeber committed
285
286
287
                    }
                    // Empty rows create a null result. These are removed.
                }.filterNotNull()
Jonas Waeber's avatar
Jonas Waeber committed
288
            }
Jonas Waeber's avatar
Jonas Waeber committed
289
        }
Jonas Waeber's avatar
Jonas Waeber committed
290
291
    }
}