TableParser.kt 11.3 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
/*
 * Table Data Import Service
Jonas Waeber's avatar
Jonas Waeber committed
3
 * Copyright (C) 2020-2021 Memoriav
Jonas Waeber's avatar
Jonas Waeber committed
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
package org.memobase

20
import ch.memobase.exceptions.InvalidInputException
Jonas Waeber's avatar
Jonas Waeber committed
21
22
23
24
25
import ch.memobase.reporting.Report
import ch.memobase.reporting.ReportStatus
import ch.memobase.settings.HeaderMetadata
import ch.memobase.settings.SettingsLoader
import ch.memobase.sftp.SftpClient
Jonas Waeber's avatar
Jonas Waeber committed
26
27
import com.beust.klaxon.json
import com.github.doyaaaaaken.kotlincsv.dsl.csvReader
Jonas Waeber's avatar
Jonas Waeber committed
28
import com.github.doyaaaaaken.kotlincsv.util.CSVFieldNumDifferentException
Jonas Waeber's avatar
Jonas Waeber committed
29
30
31
32
33
34
35
36
37
38
39
40
41
import java.io.File
import org.apache.poi.ss.usermodel.WorkbookFactory
import org.memobase.UtilityFunctions.retrieveCellValue
import org.memobase.UtilityFunctions.retrieveCells
import org.memobase.UtilityFunctions.zip
import org.memobase.models.Formats
import org.memobase.models.Message
import org.memobase.models.ResultMessage

class TableParser(settings: SettingsLoader) {
    private val sftpClient: SftpClient = SftpClient(settings.sftpSettings)
    private val invalidPropertyNameCharacters = listOf('.', ':', '/', '+')

Jonas Waeber's avatar
Jonas Waeber committed
42
    fun parseTable(key: String, inputMessage: Message, metadata: HeaderMetadata): List<ResultMessage> {
Jonas Waeber's avatar
Jonas Waeber committed
43
        return try {
Jonas Waeber's avatar
Jonas Waeber committed
44
45
46
            when (inputMessage.format) {
                Formats.xls, Formats.xlsx -> excelMapper(key, inputMessage, metadata)
                Formats.csv, Formats.tsv -> csvMapper(key, inputMessage, metadata)
47
                else -> throw InvalidInputException("Cannot parse the table with format ${inputMessage.format}.")
Jonas Waeber's avatar
Jonas Waeber committed
48
            }
Jonas Waeber's avatar
Jonas Waeber committed
49
50
51
52
53
54
55
56
57
        } catch (ex: CSVFieldNumDifferentException) {
            listOf(
                    ResultMessage(key,
                            null,
                            Report(key,
                                    ReportStatus.fatal,
                                    "Invalid CSV Input: ${ex.localizedMessage}.",
                                    Service.name)
                    ))
Jonas Waeber's avatar
Jonas Waeber committed
58
        } catch (ex: InvalidInputException) {
Jonas Waeber's avatar
Jonas Waeber committed
59
60
61
            listOf(
                    ResultMessage(key,
                            null,
62
                            Report(key,
Jonas Waeber's avatar
Jonas Waeber committed
63
64
65
66
                                    ReportStatus.fatal,
                                    "Invalid Input: ${ex.localizedMessage}. Could not process any lines.",
                                    Service.name)
                    ))
Jonas Waeber's avatar
Jonas Waeber committed
67
        } catch (ex: IllegalArgumentException) { // Sheet index does not exist
Jonas Waeber's avatar
Jonas Waeber committed
68
69
70
71
72
73
            listOf(ResultMessage(key, null,
                    Report(key,
                            ReportStatus.fatal,
                            "Invalid Sheet Index provided. The sheet index ${metadata.tableSheetIndex} does not exist.",
                            Service.name)
            ))
Jonas Waeber's avatar
Jonas Waeber committed
74
75
76
        }
    }

Jonas Waeber's avatar
Jonas Waeber committed
77
    private fun csvMapper(key: String, value: Message, metadata: HeaderMetadata): List<ResultMessage> {
Jonas Waeber's avatar
Jonas Waeber committed
78
        val resultMessages = mutableListOf<ResultMessage>()
Jonas Waeber's avatar
Jonas Waeber committed
79
        val identifierSet = mutableSetOf<String>()
80
        val inputStream = sftpClient.open(File(value.path))
Jonas Waeber's avatar
Jonas Waeber committed
81

82
        val reader =
Jonas Waeber's avatar
Jonas Waeber committed
83
84
85
86
87
88
                csvReader {
                    this.quoteChar = '"'
                    this.delimiter = if (value.format == Formats.csv) ',' else '\t'
                    this.charset = Charsets.UTF_8.displayName()
                    // this.skipEmptyLine = true
                }.readAll(inputStream)
89
90
91
92
        var headerProperties = emptyList<String>()
        var count = 0
        for (line in reader) {
            count += 1
Jonas Waeber's avatar
Jonas Waeber committed
93
94
            if (count <= metadata.tableHeaderCount) {
                if (count == metadata.tableHeaderIndex) {
95
96
97
98
99
                    headerProperties = line
                    headerProperties.forEachIndexed { index, property ->
                        val trimmedProperty = property.trim()
                        if (trimmedProperty.isEmpty()) {
                            throw InvalidInputException(
Jonas Waeber's avatar
Jonas Waeber committed
100
                                    "Missing a property name on row $count in column ${index + 1}!"
101
102
103
104
                            )
                        }
                        if (trimmedProperty.any { value -> invalidPropertyNameCharacters.contains(value) }) {
                            throw InvalidInputException(
Jonas Waeber's avatar
Jonas Waeber committed
105
                                    "Invalid property name $trimmedProperty on row $count in column ${index + 1}! You may not use the any of the following characters: + , . "
106
                            )
Jonas Waeber's avatar
Jonas Waeber committed
107
108
109
                        }
                    }
                }
110
111
112
113
                continue
            }
            // the -1 ensures, that users can start columns beginning at 1!
            val identifier: String = try {
Jonas Waeber's avatar
Jonas Waeber committed
114
                line[metadata.tableIdentifierIndex - 1].let { identifierValue ->
115
116
117
                    when (identifierValue) {
                        "" -> {
                            throw InvalidInputException(
Jonas Waeber's avatar
Jonas Waeber committed
118
                                    "The row $count has an empty identifier in column ${metadata.tableIdentifierIndex}."
119
                            )
Jonas Waeber's avatar
Jonas Waeber committed
120
                        }
Jonas Waeber's avatar
Jonas Waeber committed
121
                        in identifierSet -> {
122
                            throw InvalidInputException(
Jonas Waeber's avatar
Jonas Waeber committed
123
                                    "The row $count contains a duplicated identifier in column ${metadata.tableIdentifierIndex} with another row."
Jonas Waeber's avatar
Jonas Waeber committed
124
                            )
125
126
                        }
                        else -> {
Jonas Waeber's avatar
Jonas Waeber committed
127
                            identifierSet.add(identifierValue)
128
129
130
                            identifierValue
                        }
                    }
Jonas Waeber's avatar
Jonas Waeber committed
131
                }
132
133
            } catch (ex: InvalidInputException) {
                resultMessages.add(
Jonas Waeber's avatar
Jonas Waeber committed
134
                        ResultMessage(
135
                                key,
Jonas Waeber's avatar
Jonas Waeber committed
136
137
                                null,
                                Report(
138
                                        key,
Jonas Waeber's avatar
Jonas Waeber committed
139
140
141
142
                                        ReportStatus.fatal,
                                        ex.localizedMessage,
                                        Service.name
                                )
143
                        )
Jonas Waeber's avatar
Jonas Waeber committed
144
                )
145
                continue
Jonas Waeber's avatar
Jonas Waeber committed
146
            }
147
148
            val keyValueMap = json {
                obj(
Jonas Waeber's avatar
Jonas Waeber committed
149
                        zip(headerProperties, line)
150
151
152
                )
            }
            val report = Report(
Jonas Waeber's avatar
Jonas Waeber committed
153
154
155
156
                    identifier,
                    ReportStatus.success,
                    "",
                    Service.name
157
158
            )
            resultMessages.add(ResultMessage(identifier, keyValueMap, report))
Jonas Waeber's avatar
Jonas Waeber committed
159
160
161
162
        }
        return resultMessages
    }

Jonas Waeber's avatar
Jonas Waeber committed
163
    private fun excelMapper(key: String, value: Message, metadata: HeaderMetadata): List<ResultMessage> {
164
165
166
167
168
169
170
        val inputStream = sftpClient.open(File(value.path))
        val workbook = WorkbookFactory.create(inputStream)
        // only XSL stream closes the input stream. The XSLX stream does not
        inputStream.close()
        val identifierSet = mutableSetOf<String>()
        val propertiesList = mutableListOf<String>()
        // sheet index is 0-based. This ensures that users can access sheet 1 with index 1!
Jonas Waeber's avatar
Jonas Waeber committed
171
        val sheet = workbook.getSheetAt(metadata.tableSheetIndex - 1)
172
173
174
        var count = 0
        return sheet.filterEmptyRows().map { row ->
            count += 1
Jonas Waeber's avatar
Jonas Waeber committed
175
176
            if (count <= metadata.tableHeaderCount) {
                if (count == metadata.tableHeaderIndex) {
177
178
179
                    propertiesList.addAll(row.map { cell ->
                        if (retrieveCellValue(cell).isNotEmpty()) {
                            if (retrieveCellValue(cell).any { char ->
Jonas Waeber's avatar
Jonas Waeber committed
180
181
182
183
                                        invalidPropertyNameCharacters.contains(
                                                char
                                        )
                                    }) {
184
                                throw InvalidInputException(
Jonas Waeber's avatar
Jonas Waeber committed
185
                                        "The property in cell ${cell.address} contains one or more invalid characters: $invalidPropertyNameCharacters."
186
187
188
189
190
191
                                )
                            } else {
                                retrieveCellValue(cell)
                            }
                        } else {
                            throw InvalidInputException(
Jonas Waeber's avatar
Jonas Waeber committed
192
                                    "The header index is missing a value in cell ${cell.address}"
193
                            )
Jonas Waeber's avatar
Jonas Waeber committed
194
                        }
195
196
197
198
199
                    }.map { it.trim() })
                }
                null
            } else {
                val rowIdentifier: String = try {
Jonas Waeber's avatar
Jonas Waeber committed
200
                    row.getCell(metadata.tableIdentifierIndex - 1).let { cell ->
201
202
203
                        if (cell != null) {
                            when (val cellValue = retrieveCellValue(cell)) {
                                "" -> {
Jonas Waeber's avatar
Jonas Waeber committed
204
                                    throw InvalidInputException(
Jonas Waeber's avatar
Jonas Waeber committed
205
                                            "The row ${row.rowNum} has an empty identifier in column ${metadata.tableIdentifierIndex}."
Jonas Waeber's avatar
Jonas Waeber committed
206
207
                                    )
                                }
208
                                in identifierSet -> {
Jonas Waeber's avatar
Jonas Waeber committed
209
                                    throw InvalidInputException(
Jonas Waeber's avatar
Jonas Waeber committed
210
                                            "The row ${row.rowNum} contains a duplicated identifier in column ${metadata.tableIdentifierIndex} with another row."
Jonas Waeber's avatar
Jonas Waeber committed
211
212
                                    )
                                }
213
214
215
216
                                else -> {
                                    identifierSet.add(cellValue)
                                    cellValue
                                }
Jonas Waeber's avatar
Jonas Waeber committed
217
                            }
218
219
                        } else {
                            throw InvalidInputException(
Jonas Waeber's avatar
Jonas Waeber committed
220
                                    "No cell found in row ${row.rowNum} for column ${metadata.tableIdentifierIndex}."
Jonas Waeber's avatar
Jonas Waeber committed
221
222
                            )
                        }
223
224
225
                    }
                } catch (ex: InvalidInputException) {
                    return@map ResultMessage(
226
227
                            key, null, Report(
                            key,
Jonas Waeber's avatar
Jonas Waeber committed
228
229
230
231
                            ReportStatus.fatal,
                            ex.localizedMessage,
                            Service.name
                    )
232
233
234
235
236
                    )
                }

                val jsonObject = json {
                    obj(
Jonas Waeber's avatar
Jonas Waeber committed
237
238
239
240
                            zip(
                                    propertiesList,
                                    retrieveCells(row, propertiesList.size - 1)
                            )
241
242
243
                    )
                }
                ResultMessage(
Jonas Waeber's avatar
Jonas Waeber committed
244
                        rowIdentifier, jsonObject, Report(
245
246
                        rowIdentifier,
                        ReportStatus.success,
Jonas Waeber's avatar
Jonas Waeber committed
247
248
249
                        "",
                        Service.name
                )
250
                )
Jonas Waeber's avatar
Jonas Waeber committed
251
            }
252
253
            // Empty rows create a null result. These are removed.
        }.filterNotNull()
Jonas Waeber's avatar
Jonas Waeber committed
254
255
    }
}