Commit d79fb9ce authored by Jonas Waeber's avatar Jonas Waeber
Browse files

Adds feature to verify excel sheets

parent 828d8ba5
Pipeline #9229 passed with stages
in 6 minutes and 32 seconds
......@@ -49,6 +49,9 @@ dependencies {
implementation 'com.beust:klaxon:5.2'
// CSV Reader
implementation("com.github.doyaaaaaken:kotlin-csv-jvm:0.7.3")
// XSLX / XSL Reader
implementation 'org.apache.poi:poi:4.1.2'
implementation 'org.apache.poi:poi-ooxml:4.1.2'
// KOTLIN IMPORTS
implementation 'org.jetbrains.kotlin:kotlin-stdlib-jdk8'
......
#Fri Apr 03 11:36:55 CEST 2020
distributionUrl=https\://services.gradle.org/distributions/gradle-6.3-bin.zip
#Tue May 19 16:44:25 CEST 2020
distributionUrl=https\://services.gradle.org/distributions/gradle-6.3-all.zip
distributionBase=GRADLE_USER_HOME
distributionPath=GRADLE_USER_HOME
zipStorePath=wrapper/dists
......
......@@ -21,16 +21,19 @@ package org.memobase
import com.github.doyaaaaaken.kotlincsv.dsl.csvReader
import com.github.doyaaaaaken.kotlincsv.util.MalformedCSVException
import java.io.File
import org.apache.poi.ss.usermodel.WorkbookFactory
import org.memobase.sftp.SftpClient
class FileValidation(private val sftp: SftpClient) {
private val supportedExtensions = mapOf(
Pair("csv", "CSV"),
Pair("tsv", "TSV")
Pair("csv", "CSV"),
Pair("tsv", "TSV"),
Pair("xslx", "XSLX"),
Pair("xsl", "XSL")
)
fun validate(file: File): Pair<Message, Report> {
when (val format = validateExtension(file)) {
return when (val format = validateExtension(file)) {
"CSV", "TSV" -> {
sftp.open(file).use {
val stream = it.RemoteFileInputStream()
......@@ -42,29 +45,58 @@ class FileValidation(private val sftp: SftpClient) {
escapeChar = '\\'
}.readAll(stream)
} catch (ex: MalformedCSVException) {
return Pair(
Message("ERROR", file.path),
Report(
id = file.name,
status = "FAILURE",
message = "$format ERROR: " + ex.localizedMessage
))
}
return Pair(
Message(format, file.path),
return@use Pair(
Message("ERROR", file.path),
Report(
id = file.name,
status = "SUCCESS",
message = "Validated file at path ${file.path} with format $format."))
id = file.name,
status = "FAILURE",
message = "$format ERROR: " + ex.localizedMessage
)
)
}
Pair(
Message(format, file.path),
Report(
id = file.name,
status = "SUCCESS",
message = "Validated file at path ${file.path} with format $format."
)
)
}
}
else -> return Pair(
Message("ERROR", file.path),
Report(
"XSLX", "XSL" -> {
sftp.open(file).use {
try {
val stream = it.RemoteFileInputStream()
WorkbookFactory.create(stream)
} catch (ex: Exception) {
return@use Pair(
Message("ERROR", file.path),
Report(
id = file.name,
status = "FAILURE",
message = "$format ERROR: ${ex.localizedMessage}"
)
)
}
Pair(
Message(format, file.path),
Report(
id = file.name,
status = "FAILURE",
message = "File Extension Error: Not a valid file extension: ${file.name}."
))
status = "SUCCESS",
message = "Validated file at path ${file.path} with format $format."
)
)
}
}
else -> Pair(
Message("ERROR", file.path),
Report(
id = file.name,
status = "FAILURE",
message = "File Extension Error: Not a valid file extension: ${file.name}."
)
)
}
}
......
......@@ -45,17 +45,33 @@ class Tests {
private val sftpServer = EmbeddedSftpServer(22000, "user", "password")
private val adminClient =
AdminClient.create(mapOf(Pair(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:12345")))
AdminClient.create(mapOf(Pair(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:12345")))
init {
sftpServer.createDirectories(
"/memobase/test_institution_1/test_record_set_1/",
"/memobase/test_institution_2/test_record_set_2/"
"/memobase/test_institution_1/test_record_set_1/",
"/memobase/test_institution_2/test_record_set_2/"
)
sftpServer.putFile(
"/memobase/test_institution_1/test_record_set_1/brandt.csv",
FileInputStream("src/test/resources/data/brandt.csv")
)
sftpServer.putFile(
"/memobase/test_institution_2/test_record_set_2/bauGAZH_metadaten.csv",
FileInputStream("src/test/resources/data/bauGAZH_metadaten.csv")
)
sftpServer.putFile(
"/memobase/test_institution_3/test_record_set_3/invalid.csv",
FileInputStream("src/test/resources/data/invalid.csv")
)
sftpServer.putFile(
"/memobase/test_institution_4/test_record_set_4/file.txt",
FileInputStream("src/test/resources/data/file.txt")
)
sftpServer.putFile(
"/memobase/test_institution_5/test_record_set_5/file.xslx",
FileInputStream("src/test/resources/data/20190906_Brandt_Metadaten.xlsx")
)
sftpServer.putFile("/memobase/test_institution_1/test_record_set_1/brandt.csv", FileInputStream("src/test/resources/data/brandt.csv"))
sftpServer.putFile("/memobase/test_institution_2/test_record_set_2/bauGAZH_metadaten.csv", FileInputStream("src/test/resources/data/bauGAZH_metadaten.csv"))
sftpServer.putFile("/memobase/test_institution_3/test_record_set_3/invalid.csv", FileInputStream("src/test/resources/data/invalid.csv"))
sftpServer.putFile("/memobase/test_institution_4/test_record_set_4/file.txt", FileInputStream("src/test/resources/data/file.txt"))
}
private val consumer: KafkaConsumer<String, String>
......@@ -94,77 +110,106 @@ class Tests {
}
assertThat(totalConsumerRecords.find { value -> value.topic() == topic })
.describedAs("Message Test")
.hasFieldOrPropertyWithValue("key", params.expectedKey)
.hasFieldOrPropertyWithValue("value", params.expectedValue)
.describedAs("Message Test")
.hasFieldOrPropertyWithValue("key", params.expectedKey)
.hasFieldOrPropertyWithValue("value", params.expectedValue)
assertThat(totalConsumerRecords.find { value -> value.topic() == reportingTopic })
.describedAs("Report Test")
.hasFieldOrPropertyWithValue("key", params.expectedKey)
.hasFieldOrPropertyWithValue("value", params.expectedReportValue)
.describedAs("Report Test")
.hasFieldOrPropertyWithValue("key", params.expectedKey)
.hasFieldOrPropertyWithValue("value", params.expectedReportValue)
assertThat(totalConsumerRecords.find { value -> value.topic() == processReportingTopic })
.describedAs("Process Report Test")
.hasFieldOrPropertyWithValue("key", params.expectedProcessReport.id)
.hasFieldOrPropertyWithValue("value", Klaxon().toJsonString(params.expectedProcessReport))
.describedAs("Process Report Test")
.hasFieldOrPropertyWithValue("key", params.expectedProcessReport.id)
.hasFieldOrPropertyWithValue("value", Klaxon().toJsonString(params.expectedProcessReport))
}
private fun directoryReaderTests() = Stream.of(
TestParams(
"test1.yml",
expectedKey = "brandt.csv",
expectedValue = "{\"format\" : \"CSV\", \"path\" : \"/memobase/test_institution_1/test_record_set_1/brandt.csv\"}",
expectedReportValue = Klaxon().toJsonString(Report(
id = "brandt.csv",
status = "SUCCESS",
message = "Validated file at path /memobase/test_institution_1/test_record_set_1/brandt.csv with format CSV.")),
expectedProcessReport = Report(
id = "jobXYZ",
status = "SUCCESS",
message = "Successfully validated 1 files."
)
TestParams(
"test1.yml",
expectedKey = "brandt.csv",
expectedValue = "{\"format\" : \"CSV\", \"path\" : \"/memobase/test_institution_1/test_record_set_1/brandt.csv\"}",
expectedReportValue = Klaxon().toJsonString(
Report(
id = "brandt.csv",
status = "SUCCESS",
message = "Validated file at path /memobase/test_institution_1/test_record_set_1/brandt.csv with format CSV."
)
),
TestParams(
"test2.yml",
expectedKey = "bauGAZH_metadaten.csv",
expectedValue = "{\"format\" : \"CSV\", \"path\" : \"/memobase/test_institution_2/test_record_set_2/bauGAZH_metadaten.csv\"}",
expectedReportValue = Klaxon().toJsonString(Report(
id = "bauGAZH_metadaten.csv",
status = "SUCCESS",
message = "Validated file at path /memobase/test_institution_2/test_record_set_2/bauGAZH_metadaten.csv with format CSV.")),
expectedProcessReport = Report(
id = "jobXYZ",
status = "SUCCESS",
message = "Successfully validated 1 files."
expectedProcessReport = Report(
id = "jobXYZ",
status = "SUCCESS",
message = "Successfully validated 1 files."
)
),
TestParams(
"test2.yml",
expectedKey = "bauGAZH_metadaten.csv",
expectedValue = "{\"format\" : \"CSV\", \"path\" : \"/memobase/test_institution_2/test_record_set_2/bauGAZH_metadaten.csv\"}",
expectedReportValue = Klaxon().toJsonString(
Report(
id = "bauGAZH_metadaten.csv",
status = "SUCCESS",
message = "Validated file at path /memobase/test_institution_2/test_record_set_2/bauGAZH_metadaten.csv with format CSV."
)
),
TestParams(
"test3.yml",
expectedKey = "invalid.csv",
expectedValue = "{\"format\" : \"ERROR\", \"path\" : \"/memobase/test_institution_3/test_record_set_3/invalid.csv\"}",
expectedReportValue = Klaxon().toJsonString(Report(
id = "invalid.csv",
status = "FAILURE",
message = "CSV ERROR: Fields num seems to be 5 on each row, but on 2th csv row, fields num is 7.")),
expectedProcessReport = Report(
id = "jobXYZ",
expectedProcessReport = Report(
id = "jobXYZ",
status = "SUCCESS",
message = "Successfully validated 1 files."
)
),
TestParams(
"test3.yml",
expectedKey = "invalid.csv",
expectedValue = "{\"format\" : \"ERROR\", \"path\" : \"/memobase/test_institution_3/test_record_set_3/invalid.csv\"}",
expectedReportValue = Klaxon().toJsonString(
Report(
id = "invalid.csv",
status = "FAILURE",
message = "Failed to validate 1 of 1 files."
message = "CSV ERROR: Fields num seems to be 5 on each row, but on 2th csv row, fields num is 7."
)
),
TestParams(
"test4.yml",
expectedKey = "file.txt",
expectedValue = "{\"format\" : \"ERROR\", \"path\" : \"/memobase/test_institution_4/test_record_set_4/file.txt\"}",
expectedReportValue = Klaxon().toJsonString(Report(
id = "file.txt",
status = "FAILURE",
message = "File Extension Error: Not a valid file extension: file.txt.")),
expectedProcessReport = Report(
id = "jobXYZ",
expectedProcessReport = Report(
id = "jobXYZ",
status = "FAILURE",
message = "Failed to validate 1 of 1 files."
)
),
TestParams(
"test4.yml",
expectedKey = "file.txt",
expectedValue = "{\"format\" : \"ERROR\", \"path\" : \"/memobase/test_institution_4/test_record_set_4/file.txt\"}",
expectedReportValue = Klaxon().toJsonString(
Report(
id = "file.txt",
status = "FAILURE",
message = "Failed to validate 1 of 1 files."
message = "File Extension Error: Not a valid file extension: file.txt."
)
),
expectedProcessReport = Report(
id = "jobXYZ",
status = "FAILURE",
message = "Failed to validate 1 of 1 files."
)
),
TestParams(
"test5.yml",
expectedKey = "file.xslx",
expectedValue = "{\"format\" : \"XSLX\", \"path\" : \"/memobase/test_institution_5/test_record_set_5/file.xslx\"}",
expectedReportValue = Klaxon().toJsonString(
Report(
id = "file.xslx",
status = "SUCCESS",
message = "Validated file at path /memobase/test_institution_5/test_record_set_5/file.xslx with format XSLX."
)
),
expectedProcessReport = Report(
id = "jobXYZ",
status = "SUCCESS",
message = "Successfully validated 1 files."
)
)
)
}
id: jobXYZ
sftp:
host: localhost
port: 22000
user: user
password: password
app:
directory: /memobase/test_institution_5/test_record_set_5
kafka:
producer:
bootstrap.servers: localhost:12345
client.id: sftp-reader-p1-j1
topic:
out: sftp-reader-p1-j1
process: p1-reporting
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment