Commit 27726a51 authored by Jonas Waeber's avatar Jonas Waeber

Implement process reports

Refactors tests & move parser functionality out of KafkaTopology.kt.
parent 6e1f839c
Pipeline #12668 passed with stages
in 5 minutes and 25 seconds
......@@ -11,7 +11,7 @@ test:
tags:
- mbr
script:
- gradle --no-daemon --no-scan --no-build-cache test --fail-fast --tests "org.memobase.Tests"
- gradle --no-daemon --no-scan --no-build-cache test --fail-fast
.build-image:
......
......@@ -24,7 +24,8 @@ import org.apache.logging.log4j.LogManager
class App {
companion object {
private val log = LogManager.getLogger("TableDataTransformApp")
@JvmStatic fun main(args: Array<String>) {
@JvmStatic
fun main(args: Array<String>) {
try {
Service().run()
} catch (ex: Exception) {
......
This diff is collapsed.
......@@ -26,15 +26,15 @@ class Service(file: String = "app.yml") {
private val log = LogManager.getLogger("TableDataService")
val settings = SettingsLoader(
listOf(
"sheet",
"header.count",
"header.line",
"identifier"
),
file,
useStreamsConfig = true,
readSftpSettings = true
listOf(
"sheet",
"header.count",
"header.line",
"identifier"
),
file,
useStreamsConfig = true,
readSftpSettings = true
)
val topology = KafkaTopology(settings).build()
......
This diff is collapsed.
/*
* Table Data Import Service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase
import org.apache.poi.ss.usermodel.Cell
import org.apache.poi.ss.usermodel.CellType
import org.apache.poi.ss.usermodel.Row
object UtilityFunctions {
/**
* Retrieves cells from a row of excel. Restricts the size to the actually
* used part of sheet as otherwise the row is many time larger with many empty cells.
*
* @param row: The row from which to retrieve cells.
* @param size: The number of cells active in the sheet.
*
* @return A list of the cell values as strings.
*/
fun retrieveCells(row: Row, size: Int): List<String> {
return (0..size).map { i ->
val cell = row.getCell(i)
retrieveCellValue(cell)
}
}
/**
* Checks a cell and returns the content as string. If no valid value is found an empty
* string is returned.
*
* In case of a numeric value, the number is either interpreted as a time (if the number is below one) or
* as an integer.
*
* @param cell: A potential cell.
* @return Content of the cell as a string.
*/
fun retrieveCellValue(cell: Cell?): String {
return if (cell != null) {
when (cell.cellType) {
CellType.BLANK -> ""
CellType.BOOLEAN -> cell.booleanCellValue.toString()
CellType._NONE -> ""
CellType.NUMERIC ->
if (cell.numericCellValue >= 1) {
cell.numericCellValue.toLong().toString()
} else {
cell.localDateTimeCellValue.toLocalTime().toString()
}
CellType.STRING -> cell.stringCellValue
CellType.FORMULA -> ""
CellType.ERROR -> ""
else -> ""
}
} else ""
}
/**
* Creates pairs from header + line values in the same column.
*
* @param header: A list of all properties
* @param line: The content of the current line.
*/
fun zip(header: List<String>, line: List<String>): List<Pair<String, String>> {
val result = mutableListOf<Pair<String, String>>()
header.forEachIndexed { index, s ->
if (line[index].isNotEmpty()) {
result.add(Pair(s, line[index].trim()))
}
}
return result
}
}
/*
* Table Data Import Service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase.models
import com.beust.klaxon.json
object ErrorResult {
fun get() = json {
obj(Pair("message", Formats.error))
}
}
/*
* Table Data Import Service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase.models
object Formats {
const val csv = "CSV"
const val tsv = "TSV"
const val xlsx = "XLSX"
const val xls = "XLS"
const val ods = "ODS"
const val invalid = "INVALID"
const val error = "ERROR"
}
/*
* Table Data Import Service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase.models
import com.beust.klaxon.Klaxon
data class Message(
val format: String,
val path: String
) {
fun toJson(): String {
return Klaxon().toJsonString(this)
}
}
/*
* Table Data Import Service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase.models
import com.beust.klaxon.JsonObject
data class ParserResult(
val messages: List<ResultMessage>,
val processReport: ProcessReport
) {
constructor(key: String, jsonObject: JsonObject, report: Report, processReport: ProcessReport) : this(
listOf(ResultMessage(key, jsonObject, report)),
processReport
)
}
/*
* Table Data Import Service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase.models
import com.beust.klaxon.Klaxon
data class ProcessReport(
val id: String = "table-data-transform",
val status: String,
val total: Int,
val successes: Int,
val failures: Int
) {
constructor(status: String, total: Int) : this(
"table-data-transform",
status,
total,
if (ReportStatus.success == status) total else 0,
if (ReportStatus.failure == status) total else 0
)
fun toJson(): String {
return Klaxon().toJsonString(this)
}
}
/*
* sftp-reader
* Table Data Import Service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
......@@ -16,25 +16,16 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase
package org.memobase.models
import com.beust.klaxon.Klaxon
data class Report(
val id: String,
val status: String,
val message: String
val id: String,
val status: String,
val message: String
) {
override fun equals(other: Any?): Boolean {
return when (other) {
null -> false
!is Report -> false
else -> hashCode() == other.hashCode()
}
}
override fun hashCode(): Int {
var result = id.hashCode()
result = 31 * result + status.hashCode()
result = 31 * result + message.hashCode()
return result
fun toJson(): String {
return Klaxon().toJsonString(this)
}
}
/*
* text-file-validation
* Table Data Import Service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
......@@ -16,52 +16,13 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase
import com.beust.klaxon.json
object Formats {
const val csv = "CSV"
const val tsv = "TSV"
const val xlsx = "XLSX"
const val xls = "XLS"
const val ods = "ODS"
const val invalid = "INVALID"
const val error = "ERROR"
}
object Extensions {
const val csv = "csv"
const val tsv = "tsv"
const val xlsx = "xlsx"
const val xls = "xls"
const val ods = "ods"
}
object ReportStatus {
const val success = "SUCCESS"
const val failure = "FAILURE"
}
object ErrorResult {
val result = json {
obj(Pair("message", Formats.error))
}
}
package org.memobase.models
object ReportMessages {
fun processFailure(fileName: String, message: String): String {
return "Could not process file $fileName, because $message"
}
fun processSuccess(count: Int): String {
return "Transformed table data into $count records."
}
fun invalidFile(fileName: String, message: String): String {
return "Invalid Input Error: $message for file $fileName."
}
fun reportSuccess(identifier: String, count: Int): String {
return "Successfully transformed row $count into key-value map with identifier $identifier."
}
......@@ -69,4 +30,4 @@ object ReportMessages {
fun reportFailure(message: String): String {
return "Invalid Input Error: $message"
}
}
}
\ No newline at end of file
/*
* Table Data Import Service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase.models
object ReportStatus {
const val success = "SUCCESS"
const val failure = "FAILURE"
}
/*
* Table Data Import Service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase.models
import com.beust.klaxon.JsonObject
data class ResultMessage(
val key: String,
val value: JsonObject,
val report: Report
)
......@@ -17,6 +17,7 @@
*/
package org.memobase
import com.beust.klaxon.JsonObject
import com.beust.klaxon.Klaxon
import java.io.File
import java.io.FileInputStream
......@@ -30,12 +31,17 @@ import org.apache.kafka.streams.test.ConsumerRecordFactory
import org.apache.logging.log4j.LogManager
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.TestInstance
import org.junit.jupiter.api.assertAll
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.MethodSource
import org.memobase.models.ProcessReport
import org.memobase.models.Report
import org.memobase.models.ReportStatus
import org.memobase.testing.EmbeddedSftpServer
import java.io.StringReader
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
class Tests {
class TestIntegration {
private val log = LogManager.getLogger("TestLogger")
private val resourcePath = "src/test/resources/data"
......@@ -44,6 +50,7 @@ class Tests {
}
private val sftpServer = EmbeddedSftpServer(22000, "user", "password")
private val klaxon = Klaxon()
init {
val files = listOf(
......@@ -56,13 +63,16 @@ class Tests {
)
for (pair in files) {
sftpServer.putFile(Paths.get(pair.first, pair.second).toString(), FileInputStream(Paths.get("src/test/resources/sftp", pair.second).toFile()))
sftpServer.putFile(
Paths.get(pair.first, pair.second).toString(),
FileInputStream(Paths.get("src/test/resources/sftp", pair.second).toFile())
)
}
}
@ParameterizedTest
@MethodSource("testParams")
fun `test inputs`(params: TestParams) {
fun `test kafka integrations`(params: TestParams) {
val service = Service(params.settingsFileName)
val testDriver = TopologyTestDriver(service.topology, service.settings.kafkaStreamsSettings)
val factory = ConsumerRecordFactory(
......@@ -79,14 +89,15 @@ class Tests {
StringDeserializer(),
StringDeserializer()
)
assertThat(record).isNotNull
var count = 0
val keys = mutableListOf<String>()
val values = mutableListOf<JsonObject>()
val reports = mutableListOf<Report>()
while (record != null) {
count += 1
assertThat(record)
.isNotNull
.hasFieldOrPropertyWithValue("key", params.expectedOutputKey[count - 1])
.hasFieldOrPropertyWithValue("value", readFile(params.expectedOutputDirectoryName + "/$count.json"))
keys.add(record.key())
values.add(klaxon.parseJsonObject(StringReader(record.value())))
val reportedRecord = testDriver.readOutput(
"${service.settings.outputTopic}-reporting",
......@@ -94,16 +105,7 @@ class Tests {
StringDeserializer()
)
if (reportedRecord != null) {
val data = reportedRecord.value()
val report = Klaxon().parse<Report>(data)
assertThat(report)
.isNotNull
.isEqualTo(Klaxon().parse<Report>(readFile(params.expectedOutputDirectoryName + "/r$count.json")))
} else {
log.error("No report for record $record.")
}
klaxon.parse<Report>(reportedRecord.value())?.let { reports.add(it) }
record = testDriver.readOutput(
service.settings.outputTopic,
StringDeserializer(),
......@@ -111,15 +113,42 @@ class Tests {
)
}
val processReport = testDriver.readOutput(
val processReportRecord = testDriver.readOutput(
service.settings.processReportTopic,
StringDeserializer(),
StringDeserializer()
)
assertThat(processReport)
.isNotNull
.hasFieldOrPropertyWithValue("value", params.processReportOutput)
val processReport = klaxon.parse<ProcessReport>(processReportRecord.value())
val expectedOutputs = mutableListOf<JsonObject>()
for (i in 1..count)
expectedOutputs.add(klaxon.parseJsonObject(StringReader(readFile(params.expectedOutputDirectoryName + "/$i.json"))))
val expectedReports = mutableListOf<Report>()
for (i in 1..count)
klaxon.parse<Report>(StringReader(readFile(params.expectedOutputDirectoryName + "/r$i.json")))
?.let { expectedReports.add(it) }
assertAll(
"",
{
assertThat(keys)
.containsAll(params.expectedOutputKey)
},
{
assertThat(values)
.containsAll(expectedOutputs)
},
{
assertThat(reports)
.containsAll(expectedReports)
},
{
assertThat(processReport)
.isNotNull
.isEqualTo(params.processReportOutput)
}
)
}
private fun testParams() = Stream.of(
......@@ -130,13 +159,7 @@ class Tests {
"brandt_metadaten.csv",
listOf("brandt_metadaten.csv"),
"error_filter_output",
Klaxon().toJsonString(
Report(
"table-data-transform",
"FAILURE",
"Could not process file brandt_metadaten.csv, because the input file is invalid."
)
)
ProcessReport(ReportStatus.failure, 1)
),
TestParams(
"valid csv input",
......@@ -145,7 +168,7 @@ class Tests {
"brandt_metadaten.csv",
listOf("AVGR13716"),
"brandt_output",
Klaxon().toJsonString(Report("table-data-transform", "SUCCESS", "Transformed table data into 1 records."))
ProcessReport(ReportStatus.success, 1)
),
TestParams(
"invalid xlsx input",
......@@ -154,22 +177,16 @@ class Tests {
"excel_test_file.xlsx",
listOf("excel_test_file.xlsx"),
"excel_output",
Klaxon().toJsonString(
Report(
"table-data-transform",
"FAILURE",
"Could not process file excel_test_file.xlsx, because The property in cell J3 contains one or more invalid characters: [., :, /, +]."
)
)
ProcessReport(ReportStatus.failure, 1)
),
TestParams(
"valid xlsx input",
"test3.yml",
"excel_test_input2.json",
"excel_test_file2.xlsx",
listOf("AVGR13716", "AVGR13717", ""),
listOf("AVGR13716", "AVGR13717"),
"excel_output_valid",
Klaxon().toJsonString(Report("table-data-transform", "SUCCESS", "Transformed table data into 2 records."))
ProcessReport(ReportStatus.success, 2)
),
TestParams(
"test-numeric-cells",
......@@ -178,60 +195,7 @@ class Tests {
"test-numeric-cells.xls",
listOf("1000106888"),
"numeric_cell_output",
Klaxon().toJsonString(Report("table-data-transform", "SUCCESS", "Transformed table data into 1 records."))
)/*,
TestParams(
"valid csv input",
"test2.yml",
"baugazh_csv_import.json",
"mapping_baugazh.csv",
"MEI_49884",
"baugazh_output"
)*/
)
/*
@Test
fun `test create records`() {
val settingsLoader = SettingsLoader(
listOf(
"sheet",
"header.count",
"header.line",