Commit a100263a authored by Jonas Waeber's avatar Jonas Waeber
Browse files

Fix excel parser

- Sheet index is now offset to one based.
- Filters empty rows.
- Improves error message for identifier errors.
- Properly transform cell values.
parent 6b9cb0f3
......@@ -31,6 +31,7 @@ import org.apache.kafka.streams.Topology
import org.apache.kafka.streams.kstream.KStream
import org.apache.kafka.streams.kstream.Predicate
import org.apache.logging.log4j.LogManager
import org.apache.poi.ss.usermodel.CellType
import org.apache.poi.ss.usermodel.WorkbookFactory
import org.memobase.settings.SettingsLoader
import org.memobase.sftp.SftpClient
......@@ -282,9 +283,24 @@ class KafkaTopology(private val settings: SettingsLoader) {
WorkbookFactory.create(stream).use { workbook ->
val identifierSet = mutableSetOf<String>()
val propertiesList = mutableListOf<String>()
val sheet = workbook.getSheetAt(sheetIndex)
// sheet index is 0-based. This ensures that users can access sheet 1 with index 1!
val sheet = workbook.getSheetAt(sheetIndex - 1)
var count = 0
sheet.map { row ->
sheet.filter { row ->
row.any { cell ->
// filter all rows with no string, boolean or numeric cell
when (cell.cellType) {
CellType._NONE -> false
CellType.NUMERIC -> true
CellType.STRING -> true
CellType.FORMULA -> false
CellType.BLANK -> false
CellType.BOOLEAN -> true
CellType.ERROR -> false
else -> false
}
}
}.map { row ->
count += 1
if (count <= headerCount) {
if (count == propertyNamesIndex) {
......@@ -318,13 +334,13 @@ class KafkaTopology(private val settings: SettingsLoader) {
when (val cellValue = cell.stringCellValue) {
"" -> {
throw InvalidInputException(
"${row.rowNum}.$identifierIndex",
"CellAddress: $count:$identifierIndex",
"The row ${row.rowNum} has an empty identifier in column $identifierIndex."
)
}
in identifierSet -> {
throw InvalidInputException(
"${row.rowNum}.$identifierIndex",
"CellAddress: $count:$identifierIndex",
"The row ${row.rowNum} contains a duplicated identifier in column $identifierIndex with another row."
)
}
......@@ -350,7 +366,19 @@ class KafkaTopology(private val settings: SettingsLoader) {
obj(
zip(
propertiesList,
row.map { cell -> if (cell != null) cell.stringCellValue else "" })
row.map { cell ->
if (cell != null) {
when (cell.cellType) {
CellType.BLANK -> ""
CellType.BOOLEAN -> cell.booleanCellValue.toString()
CellType._NONE -> ""
CellType.NUMERIC -> cell.numericCellValue.toString()
CellType.STRING -> cell.stringCellValue
CellType.FORMULA -> ""
CellType.ERROR -> ""
else -> ""
}
} else "" })
)
}
Pair(
......@@ -358,7 +386,7 @@ class KafkaTopology(private val settings: SettingsLoader) {
Report(
rowIdentifier,
ReportStatus.success,
ReportMessages.reportSuccess(rowIdentifier, row.rowNum)
ReportMessages.reportSuccess(rowIdentifier, count)
)
)
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment