Commit dbf8fa11 authored by Jonas Waeber's avatar Jonas Waeber
Browse files

Refactored process reporting

parent 3c1e2574
Pipeline #12648 passed with stages
in 7 minutes and 5 seconds
## Text File validation Service
[![pipeline status](https://gitlab.switch.ch/memoriav/memobase-2020/services/import-process/text-file-validation/badges/master/pipeline.svg)](https://gitlab.switch.ch/memoriav/memobase-2020/services/import-process/text-file-validation/-/commits/master)
Checks the files present in the `app.directory` folder on the sFTP server.
[Confluence Doku](https://memobase.atlassian.net/wiki/spaces/TBAS/pages/29196525/Service+Text+File+Validation)
......
......@@ -18,7 +18,13 @@
package org.memobase
import com.beust.klaxon.Klaxon
data class Message(
val format: String,
val path: String
)
) {
fun toJson(): String {
return Klaxon().toJsonString(this)
}
}
/*
* xml-data-transform
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase
import com.beust.klaxon.Klaxon
data class ProcessReport(
val id: String,
val status: String,
val total: Int,
val successes: Int,
val failures: Int
) {
fun toJson(): String {
return Klaxon().toJsonString(this)
}
}
......@@ -17,7 +17,6 @@
*/
package org.memobase
import com.beust.klaxon.Klaxon
import java.io.Closeable
import java.util.Properties
import org.apache.kafka.clients.producer.KafkaProducer
......@@ -28,15 +27,15 @@ class Producer(props: Properties, private val topic: String) : Closeable {
private val reportingTopic = "$topic-reporting"
fun sendMessage(key: String, message: Message) {
instance.send(ProducerRecord(topic, key, Klaxon().toJsonString(message)))
instance.send(ProducerRecord(topic, key, message.toJson()))
}
fun sendReport(report: Report) {
instance.send(ProducerRecord(reportingTopic, report.id, Klaxon().toJsonString(report)))
instance.send(ProducerRecord(reportingTopic, report.id, report.toJson()))
}
fun sendJobReport(report: Report, topic: String) {
instance.send(ProducerRecord(topic, report.id, Klaxon().toJsonString(report)))
fun sendJobReport(report: ProcessReport, topic: String) {
instance.send(ProducerRecord(topic, report.id, report.toJson()))
}
override fun close() {
......
......@@ -18,8 +18,14 @@
package org.memobase
import com.beust.klaxon.Klaxon
data class Report(
val id: String,
val status: String,
val message: String
)
) {
fun toJson(): String {
return Klaxon().toJsonString(this)
}
}
......@@ -19,6 +19,7 @@
package org.memobase
import java.io.File
import kotlin.system.exitProcess
import org.apache.logging.log4j.LogManager
import org.apache.logging.log4j.Logger
import org.memobase.exceptions.SftpClientException
......@@ -37,18 +38,38 @@ class Service(fileName: String = "app.yml") {
)
private val producer = Producer(settings.kafkaProducerSettings, settings.outputTopic)
private val directory = settings.appSettings.getProperty("directory")
private val validator = FileValidation()
private val sftpClient = SftpClient(settings.sftpSettings)
private var totalCount = 0
fun run() {
producer.use { producer ->
sftpClient.use { sftp ->
log.info("Connected to SFTP & Kafka.")
val files = try {
val fileList = sftp.listFiles(directory).map { File(it) }
totalCount = fileList.size
fileList
} catch (ex: SftpClientException) {
ex.printStackTrace()
log.error("SFTP Exception: Could not compile the file list on sftp server in directory: $directory.")
val report = ProcessReport(
"text-file-validation",
status = ReportStatus.failure,
total = 0,
successes = 0,
failures = 0
)
producer.sendJobReport(report, settings.processReportTopic)
exitProcess(1)
}
log.info("Retrieved file list from sftp server at path: $directory")
val reports = mutableListOf<Report>()
try {
val validator = FileValidation()
val files = sftp.listFiles(settings.appSettings.getProperty("directory")).map { File(it) }
val reports = mutableListOf<Report>()
log.info("There are a total of ${files.size} files to validate ")
log.info("There are a total of ${files.size} files to validate.")
for (file in files) {
log.info("Validate file $file.")
val format = validator.validateExtension(file)
......@@ -78,20 +99,24 @@ class Service(fileName: String = "app.yml") {
if (failures > 0) {
log.warn("Validation ended with $failures failures!")
producer.sendJobReport(
Report(
ProcessReport(
"text-file-validation",
status = ReportStatus.failure,
message = ReportMessages.processFailure(failures, reports.size)
total = totalCount,
failures = failures,
successes = totalCount - failures
),
settings.processReportTopic
)
} else {
log.info("Validation was successful!")
producer.sendJobReport(
Report(
ProcessReport(
"text-file-validation",
status = ReportStatus.success,
message = ReportMessages.processSuccess(reports.size)
total = totalCount,
successes = reports.size,
failures = 0
),
settings.processReportTopic
)
......@@ -99,20 +124,24 @@ class Service(fileName: String = "app.yml") {
} catch (ex: SftpClientException) {
ex.printStackTrace()
log.error("SFTP Exception: ${ex.localizedMessage}.")
val report = Report(
val report = ProcessReport(
"text-file-validation",
status = ReportStatus.failure,
message = "SFTP Exception: ${ex.localizedMessage}."
total = totalCount,
failures = reports.count { report -> report.status == ReportStatus.failure },
successes = reports.count { report -> report.status == ReportStatus.success }
)
producer.sendJobReport(report, settings.processReportTopic)
} catch (ex: Exception) {
ex.printStackTrace()
log.error(ex.javaClass.canonicalName + ": " + ex.localizedMessage)
producer.sendJobReport(
Report(
ProcessReport(
"text-file-validation",
status = ReportStatus.failure,
message = "Unknown Exception: ${ex.javaClass.canonicalName}: ${ex.localizedMessage}."
total = totalCount,
failures = reports.count { report -> report.status == ReportStatus.failure },
successes = reports.count { report -> report.status == ReportStatus.success }
),
settings.processReportTopic
)
......
......@@ -22,6 +22,6 @@ data class TestParams(
val configFile: String,
val expectedKey: String,
val expectedValue: String,
val expectedReportValue: String,
val expectedProcessReport: Report
val expectedReportValue: Report,
val expectedProcessReport: ProcessReport
)
......@@ -17,7 +17,6 @@
*/
package org.memobase
import com.beust.klaxon.Klaxon
import java.io.FileInputStream
import java.nio.file.Paths
import java.time.Duration
......@@ -30,6 +29,7 @@ import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.logging.log4j.LogManager
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.TestInstance
import org.junit.jupiter.api.assertAll
import org.junit.jupiter.api.extension.ExtendWith
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.MethodSource
......@@ -56,7 +56,10 @@ class Tests {
)
for (pair in files) {
sftpServer.putFile(Paths.get(pair.first, pair.second).toString(), FileInputStream(Paths.get("src/test/resources/data", pair.second).toFile()))
sftpServer.putFile(
Paths.get(pair.first, pair.second).toString(),
FileInputStream(Paths.get("src/test/resources/data", pair.second).toFile())
)
}
}
......@@ -95,20 +98,26 @@ class Tests {
result = consumer.poll(Duration.ofMillis(10))
}
assertThat(totalConsumerRecords.find { value -> value.topic() == topic })
.describedAs("Message Test")
.hasFieldOrPropertyWithValue("key", params.expectedKey)
.hasFieldOrPropertyWithValue("value", params.expectedValue)
assertThat(totalConsumerRecords.find { value -> value.topic() == reportingTopic })
.describedAs("Report Test")
.hasFieldOrPropertyWithValue("key", params.expectedKey)
.hasFieldOrPropertyWithValue("value", params.expectedReportValue)
assertThat(totalConsumerRecords.find { value -> value.topic() == processReportingTopic })
.describedAs("Process Report Test")
.hasFieldOrPropertyWithValue("key", params.expectedProcessReport.id)
.hasFieldOrPropertyWithValue("value", Klaxon().toJsonString(params.expectedProcessReport))
assertAll("",
{
assertThat(totalConsumerRecords.find { value -> value.topic() == topic })
.describedAs("Message Test")
.hasFieldOrPropertyWithValue("key", params.expectedKey)
.hasFieldOrPropertyWithValue("value", params.expectedValue)
},
{
assertThat(totalConsumerRecords.find { value -> value.topic() == reportingTopic })
.describedAs("Report Test")
.hasFieldOrPropertyWithValue("key", params.expectedKey)
.hasFieldOrPropertyWithValue("value", params.expectedReportValue.toJson())
},
{
assertThat(totalConsumerRecords.find { value -> value.topic() == processReportingTopic })
.describedAs("Process Report Test")
.hasFieldOrPropertyWithValue("key", params.expectedProcessReport.id)
.hasFieldOrPropertyWithValue("value", params.expectedProcessReport.toJson())
}
)
}
private fun directoryReaderTests() = Stream.of(
......@@ -116,136 +125,136 @@ class Tests {
"test1.yml",
expectedKey = "brandt.csv",
expectedValue = "{\"format\" : \"CSV\", \"path\" : \"/memobase/test_record_set_1/brandt.csv\"}",
expectedReportValue = Klaxon().toJsonString(
Report(
id = "brandt.csv",
status = "SUCCESS",
message = "Validated file at path /memobase/test_record_set_1/brandt.csv with format CSV."
)
expectedReportValue = Report(
id = "brandt.csv",
status = "SUCCESS",
message = "Validated file at path /memobase/test_record_set_1/brandt.csv with format CSV."
),
expectedProcessReport = Report(
expectedProcessReport = ProcessReport(
id = "text-file-validation",
status = "SUCCESS",
message = "Successfully validated 1 files."
status = ReportStatus.success,
total = 1,
failures = 0,
successes = 1
)
),
TestParams(
"test2.yml",
expectedKey = "bauGAZH_metadaten.csv",
expectedValue = "{\"format\" : \"CSV\", \"path\" : \"/memobase/test_record_set_2/bauGAZH_metadaten.csv\"}",
expectedReportValue = Klaxon().toJsonString(
Report(
id = "bauGAZH_metadaten.csv",
status = "SUCCESS",
message = "Validated file at path /memobase/test_record_set_2/bauGAZH_metadaten.csv with format CSV."
)
expectedReportValue = Report(
id = "bauGAZH_metadaten.csv",
status = "SUCCESS",
message = "Validated file at path /memobase/test_record_set_2/bauGAZH_metadaten.csv with format CSV."
),
expectedProcessReport = Report(
expectedProcessReport = ProcessReport(
id = "text-file-validation",
status = "SUCCESS",
message = "Successfully validated 1 files."
total = 1,
failures = 0,
successes = 1
)
),
TestParams(
"test3.yml",
expectedKey = "invalid.csv",
expectedValue = "{\"format\" : \"ERROR\", \"path\" : \"/memobase/test_record_set_3/invalid.csv\"}",
expectedReportValue = Klaxon().toJsonString(
Report(
id = "invalid.csv",
status = "FAILURE",
message = "CSV ERROR: Fields num seems to be 4 on each row, but on 2th csv row, fields num is 6. for file /memobase/test_record_set_3/invalid.csv."
)
expectedReportValue = Report(
id = "invalid.csv",
status = "FAILURE",
message = "CSV ERROR: Fields num seems to be 4 on each row, but on 2th csv row, fields num is 6. for file /memobase/test_record_set_3/invalid.csv."
),
expectedProcessReport = Report(
expectedProcessReport = ProcessReport(
id = "text-file-validation",
status = "FAILURE",
message = "Failed to validate 1 of 1 files."
total = 1,
failures = 1,
successes = 0
)
),
TestParams(
"test4.yml",
expectedKey = "file.txt",
expectedValue = "{\"format\" : \"ERROR\", \"path\" : \"/memobase/test_record_set_4/file.txt\"}",
expectedReportValue = Klaxon().toJsonString(
Report(
id = "file.txt",
status = "FAILURE",
message = "File Extension Error: Not a valid file extension: file.txt."
)
expectedReportValue = Report(
id = "file.txt",
status = "FAILURE",
message = "File Extension Error: Not a valid file extension: file.txt."
),
expectedProcessReport = Report(
expectedProcessReport = ProcessReport(
id = "text-file-validation",
status = "FAILURE",
message = "Failed to validate 1 of 1 files."
total = 1,
failures = 1,
successes = 0
)
),
TestParams(
"test5.yml",
expectedKey = "20190906_Brandt_Metadaten.xlsx",
expectedValue = "{\"format\" : \"XLSX\", \"path\" : \"/memobase/test_record_set_5/20190906_Brandt_Metadaten.xlsx\"}",
expectedReportValue = Klaxon().toJsonString(
Report(
id = "20190906_Brandt_Metadaten.xlsx",
status = "SUCCESS",
message = "Validated file at path /memobase/test_record_set_5/20190906_Brandt_Metadaten.xlsx with format XLSX."
)
expectedReportValue = Report(
id = "20190906_Brandt_Metadaten.xlsx",
status = "SUCCESS",
message = "Validated file at path /memobase/test_record_set_5/20190906_Brandt_Metadaten.xlsx with format XLSX."
),
expectedProcessReport = Report(
expectedProcessReport = ProcessReport(
id = "text-file-validation",
status = "SUCCESS",
message = "Successfully validated 1 files."
total = 1,
failures = 0,
successes = 1
)
),
TestParams(
"test6.yml",
expectedKey = "Export_Bilder_der_Arbeit_8.csv",
expectedValue = "{\"format\" : \"ERROR\", \"path\" : \"/memobase/test_record_set_6/Export_Bilder_der_Arbeit_8.csv\"}",
expectedReportValue = Klaxon().toJsonString(
Report(
id = "Export_Bilder_der_Arbeit_8.csv",
status = "FAILURE",
message = "CSV ERROR: Fields num seems to be 1 on each row, but on 2th csv row, fields num is 5. for file /memobase/test_record_set_6/Export_Bilder_der_Arbeit_8.csv."
)
expectedReportValue = Report(
id = "Export_Bilder_der_Arbeit_8.csv",
status = "FAILURE",
message = "CSV ERROR: Fields num seems to be 1 on each row, but on 2th csv row, fields num is 5. for file /memobase/test_record_set_6/Export_Bilder_der_Arbeit_8.csv."
),
expectedProcessReport = Report(
expectedProcessReport = ProcessReport(
id = "text-file-validation",
status = "FAILURE",
message = "Failed to validate 1 of 1 files."
total = 1,
failures = 1,
successes = 0
)
),
TestParams(
"test7.yml",
expectedKey = "valid_xml.xml",
expectedValue = "{\"format\" : \"XML\", \"path\" : \"/memobase/test_record_set_7/valid_xml.xml\"}",
expectedReportValue = Klaxon().toJsonString(
Report(
id = "valid_xml.xml",
status = "SUCCESS",
message = "Validated file at path /memobase/test_record_set_7/valid_xml.xml with format XML."
)
expectedReportValue = Report(
id = "valid_xml.xml",
status = "SUCCESS",
message = "Validated file at path /memobase/test_record_set_7/valid_xml.xml with format XML."
),
expectedProcessReport = Report(
expectedProcessReport = ProcessReport(
id = "text-file-validation",
status = "SUCCESS",
message = "Successfully validated 1 files."
total = 1,
failures = 0,
successes = 1
)
),
TestParams(
"test8.yml",
expectedKey = "invalid.xml",
expectedValue = "{\"format\" : \"ERROR\", \"path\" : \"/memobase/test_record_set_8/invalid.xml\"}",
expectedReportValue = Klaxon().toJsonString(
Report(
id = "invalid.xml",
status = "FAILURE",
message = "XML ERROR: Element type \"foxml:objectProperties\" must be followed by either attribute specifications, \">\" or \"/>\". for file /memobase/test_record_set_8/invalid.xml."
)
expectedReportValue = Report(
id = "invalid.xml",
status = "FAILURE",
message = "XML ERROR: Element type \"foxml:objectProperties\" must be followed by either attribute specifications, \">\" or \"/>\". for file /memobase/test_record_set_8/invalid.xml."
),
expectedProcessReport = Report(
expectedProcessReport = ProcessReport(
id = "text-file-validation",
status = "FAILURE",
message = "Failed to validate 1 of 1 files."
total = 1,
failures = 1,
successes = 0
)
)
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment