Commit ebd4cfb7 authored by Jonas Waeber's avatar Jonas Waeber

Refactor input

parent dbf8fa11
apiVersion: v1
kind: ConfigMap
metadata:
name: "{{ .Values.processId }}-{{ .Values.jobName }}-config"
name: "{{ .Values.recordSetId }}-{{ .Values.shortSessionId }}-config"
namespace: memobase
data:
APP_DIRECTORY: "{{ .Values.appDirectory }}"
CLIENT_ID: "{{ .Values.processId }}-{{ .Values.jobName }}"
TOPIC_IN: "{{ .Values.processId }}-{{ .Values.jobName }}"
TOPIC_PROCESS: "{{ .Values.processId }}-reporting"
\ No newline at end of file
SESSION_ID: "{{ .Values.sessionId }}"
RECORD_SET_ID: "{{ .Values.recordSetId }}"
INSTITUTION_ID: "{{ .Values.institutionId }}"
IS_PUBLISHED: "{{ .Values.isPublished }}"
XML_RECORD_TAG: "{{.Values.xmlRecordTag }}"
XML_IDENTIIFER_FIELD_NAME: "{{ .Values.xmlIdentifierFieldName }}"
TABLE_SHEET_INDEX: "{{.Values.tableSheetIndex }}"
TABLE_HEADER_COUNT: "{{ .Values.tableHeaderCount }}"
TABLE_HEADER_INDEX: "{{ .Values.tableHeaderIndex }}"
TABLE_IDENTIFIER_INDEX: "{{ .Values.tableIdentifierIndex }}"
CLIENT_ID: "{{ .Values.recordSetId }}-{{ .Values.sessionId }}"
TOPIC_OUT: "{{.Values.topicName }}"
TOPIC_REPORTING: "{{ .Values.reportingTopicName }}"
\ No newline at end of file
apiVersion: batch/v1
kind: Job
metadata:
name: "{{ .Values.processId }}-{{ .Values.jobName }}"
name: "{{ .Values.reportingTopicName }}-{{ .Values.shortSessionId }}"
namespace: memobase
labels:
institutionId: "{{ .Values.institutionId }}"
recordSetId: "{{ .Values.recordSetId }}"
jobType: "import-job"
jobType: "text-file-validation"
spec:
template:
spec:
containers:
- name: "{{ .Values.processId }}-{{ .Values.jobName }}"
- name: "{{ .Values.recordSetId }}-{{ .Values.sessionId }}"
image: "{{ .Values.registry }}/{{ .Values.image }}:{{ .Values.tag }}"
envFrom:
- secretRef:
......@@ -19,6 +19,6 @@ spec:
- configMapRef:
name: "{{ .Values.kafkaConfigs }}"
- configMapRef:
name: "{{ .Values.processId }}-{{ .Values.jobName }}-config"
name: "{{ .Values.recordSetId }}-{{ .Values.shortSessionId }}-config"
restartPolicy: Never
backoffLimit: 0
\ No newline at end of file
......@@ -9,20 +9,27 @@ tag: "latest"
kafkaConfigs: prod-kafka-bootstrap-servers
sftpConfigs: internal-sftp-config
topicName: import-process-data-transform
reportingTopicName: import-process-reporting
############################################
## Values below should be defined via the #
## User Interface (Drupal) #
############################################
jobName: text-file-validation
processId: p0001
###
# API Configs (Mandatory!)
###
sessionId: placeholder
shortSessionId: placeholder
institutionId: placeholder
recordSetId: placeholder
## Needs to be set to the directory on the sftp server.
## this is a relative path built like this:
## "./{RECORD_SET_ID}"
## The exact structure will be defined in task MEMO-196
appDirectory: placeholderValue
\ No newline at end of file
isPublished: false
# Step 2
# xml-data-transform
xmlRecordTag: record
xmlIdentifierFieldName: id
# table-data-transform
tableSheetIndex: 1
tableHeaderCount: 1
tableHeaderIndex: 1
tableIdentifierIndex: 1
......@@ -28,6 +28,7 @@ class App {
try {
val service = Service()
service.run()
exitProcess(0)
} catch (ex: Exception) {
ex.printStackTrace()
log.error("Stopping application due to error: " + ex.message)
......
/*
* xml-data-transform
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase
import com.beust.klaxon.Klaxon
data class ProcessReport(
val id: String,
val status: String,
val total: Int,
val successes: Int,
val failures: Int
) {
fun toJson(): String {
return Klaxon().toJsonString(this)
}
}
......@@ -21,23 +21,31 @@ import java.io.Closeable
import java.util.Properties
import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.header.Header
import org.apache.kafka.common.header.internals.RecordHeader
class Producer(props: Properties, private val topic: String) : Closeable {
class Producer(
props: Properties,
headers: Properties,
private val outputTopic: String,
private val reportingTopic: String
) : Closeable {
private val instance = KafkaProducer<String, String>(props)
private val reportingTopic = "$topic-reporting"
private val headers = transformHeaders(headers)
private fun transformHeaders(headers: Properties): List<Header> {
return headers.map { item ->
RecordHeader(item.key as String, (item.value as String).toByteArray())
}
}
fun sendMessage(key: String, message: Message) {
instance.send(ProducerRecord(topic, key, message.toJson()))
instance.send(ProducerRecord(outputTopic, null, key, message.toJson(), headers))
}
fun sendReport(report: Report) {
instance.send(ProducerRecord(reportingTopic, report.id, report.toJson()))
}
fun sendJobReport(report: ProcessReport, topic: String) {
instance.send(ProducerRecord(topic, report.id, report.toJson()))
}
override fun close() {
instance.close()
}
......
......@@ -19,13 +19,38 @@
package org.memobase
import com.beust.klaxon.Klaxon
import java.time.LocalDateTime
data class Report(
val id: String,
val step: String = "text-file-validation",
val timestamp: String = LocalDateTime.now().toString(),
val status: String,
val message: String
) {
fun toJson(): String {
return Klaxon().toJsonString(this)
}
override fun equals(other: Any?): Boolean {
if (this === other) return true
if (javaClass != other?.javaClass) return false
other as Report
if (id != other.id) return false
if (step != other.step) return false
if (status != other.status) return false
if (message != other.message) return false
return true
}
override fun hashCode(): Int {
var result = id.hashCode()
result = 31 * result + step.hashCode()
result = 31 * result + status.hashCode()
result = 31 * result + message.hashCode()
return result
}
}
......@@ -31,14 +31,26 @@ class Service(fileName: String = "app.yml") {
val settings =
SettingsLoader(
listOf("directory"),
listOf(
"sessionId",
"recordSetId",
"institutionId",
"isPublished",
"xmlRecordTag",
"xmlIdentifierFieldName",
"tableSheetIndex",
"tableHeaderCount",
"tableHeaderIndex",
"tableIdentifierIndex"
),
fileName,
useProducerConfig = true,
readSftpSettings = true
)
private val producer = Producer(settings.kafkaProducerSettings, settings.outputTopic)
private val directory = settings.appSettings.getProperty("directory")
private val producer = Producer(settings.kafkaProducerSettings, settings.appSettings, settings.outputTopic, settings.processReportTopic)
private val recordSetId = settings.appSettings.getProperty("recordSetId")
private val sessionId = settings.appSettings.getProperty("sessionId")
private val validator = FileValidation()
private val sftpClient = SftpClient(settings.sftpSettings)
......@@ -50,34 +62,32 @@ class Service(fileName: String = "app.yml") {
sftpClient.use { sftp ->
log.info("Connected to SFTP & Kafka.")
val files = try {
val fileList = sftp.listFiles(directory).map { File(it) }
val fileList = sftp.listFiles(recordSetId).map { File(it) }
totalCount = fileList.size
fileList
} catch (ex: SftpClientException) {
ex.printStackTrace()
log.error("SFTP Exception: Could not compile the file list on sftp server in directory: $directory.")
val report = ProcessReport(
"text-file-validation",
log.error("SFTP Exception: Could not compile the file list on sftp server in directory: './$recordSetId/'.")
val report = Report(
"$recordSetId#$sessionId",
status = ReportStatus.failure,
total = 0,
successes = 0,
failures = 0
message = "SFTP Exception: ${ex.localizedMessage}"
)
producer.sendJobReport(report, settings.processReportTopic)
producer.sendReport(report)
exitProcess(1)
}
log.info("Retrieved file list from sftp server at path: $directory")
log.info("Retrieved file list from sftp server from folder: $recordSetId")
val reports = mutableListOf<Report>()
try {
log.info("There are a total of ${files.size} files to validate.")
log.info("Total files: ${files.size}.")
for (file in files) {
log.info("Validate file $file.")
log.info("Begin Validation: $file.")
val format = validator.validateExtension(file)
try {
val remoteFile = sftp.open(file)
remoteFile.use {
val validationResult = validator.validate(it.RemoteFileInputStream(), format, file)
log.info("Validated file at path ${validationResult.first}")
log.info("Validation ${validationResult.second.status}.")
producer.sendMessage(validationResult.second.id, validationResult.first)
producer.sendReport(validationResult.second)
reports.add(validationResult.second)
......@@ -95,56 +105,28 @@ class Service(fileName: String = "app.yml") {
}
}
log.info("Collected a total of ${reports.size} reports.")
val failures = reports.count { report -> report.status == ReportStatus.failure }
if (failures > 0) {
log.warn("Validation ended with $failures failures!")
producer.sendJobReport(
ProcessReport(
"text-file-validation",
status = ReportStatus.failure,
total = totalCount,
failures = failures,
successes = totalCount - failures
),
settings.processReportTopic
)
} else {
log.info("Validation was successful!")
producer.sendJobReport(
ProcessReport(
"text-file-validation",
status = ReportStatus.success,
total = totalCount,
successes = reports.size,
failures = 0
),
settings.processReportTopic
)
}
} catch (ex: SftpClientException) {
ex.printStackTrace()
log.error("SFTP Exception: ${ex.localizedMessage}.")
val report = ProcessReport(
"text-file-validation",
status = ReportStatus.failure,
total = totalCount,
failures = reports.count { report -> report.status == ReportStatus.failure },
successes = reports.count { report -> report.status == ReportStatus.success }
producer.sendReport(
Report(
"$recordSetId#$sessionId",
status = ReportStatus.failure,
message = "SFTP Exception: ${ex.localizedMessage}."
)
)
producer.sendJobReport(report, settings.processReportTopic)
exitProcess(1)
} catch (ex: Exception) {
ex.printStackTrace()
log.error(ex.javaClass.canonicalName + ": " + ex.localizedMessage)
producer.sendJobReport(
ProcessReport(
"text-file-validation",
log.error("${ex.javaClass.canonicalName}: ${ex.localizedMessage}.")
producer.sendReport(
Report(
"$recordSetId#$sessionId",
status = ReportStatus.failure,
total = totalCount,
failures = reports.count { report -> report.status == ReportStatus.failure },
successes = reports.count { report -> report.status == ReportStatus.success }
),
settings.processReportTopic
message = "Unknown Exception: ${ex.localizedMessage}."
)
)
exitProcess(1)
}
}
}
......
......@@ -4,11 +4,20 @@ sftp:
user: ${SFTP_USER:?env}
password: ${SFTP_PASSWORD:?env}
app:
directory: ${APP_DIRECTORY:?env}
sessionId: ${SESSION_ID:?env}
recordSetId: ${RECORD_SET_ID:?env}
institutionId: ${INSTITUTION_ID:?env}
isPublished: ${IS_PUBLISHED:?env}
xmlRecordTag: ${XML_RECORD_TAG:?env}
xmlIdentifierFieldName: ${XML_IDENTIIFER_FIELD_NAME:?env}
tableSheetIndex: ${TABLE_SHEET_INDEX:?env}
tableHeaderCount: ${TABLE_HEADER_COUNT:?env}
tableHeaderIndex: ${TABLE_HEADER_INDEX:?env}
tableIdentifierIndex: ${TABLE_IDENTIFIER_INDEX:?env}
kafka:
producer:
bootstrap.servers: ${KAFKA_BOOTSTRAP_SERVERS:?env}
client.id: ${CLIENT_ID:?env}
topic:
out: ${TOPIC_IN:?env}
process: ${TOPIC_PROCESS:?env}
\ No newline at end of file
out: ${TOPIC_OUT:?env}
process: ${TOPIC_REPORTING:?env}
\ No newline at end of file
......@@ -22,6 +22,5 @@ data class TestParams(
val configFile: String,
val expectedKey: String,
val expectedValue: String,
val expectedReportValue: Report,
val expectedProcessReport: ProcessReport
val expectedReportValue: Report
)
This diff is collapsed.
......@@ -4,11 +4,20 @@ sftp:
user: user
password: password
app:
directory: /memobase/test_record_set_1
sessionId: session1
recordSetId: /testset1
institutionId: mrv
isPublished: false
xmlRecordTag: record
xmlIdentifierFieldName: id
tableSheetIndex: 1
tableHeaderCount: 1
tableHeaderIndex: 1
tableIdentifierIndex: 1
kafka:
producer:
bootstrap.servers: localhost:12345
client.id: sftp-reader-p1-j1
client.id: test-file-validation-client
topic:
out: sftp-reader-p1-j1
process: p1-reporting
\ No newline at end of file
out: import-process-data-transform
process: import-process-reporting
\ No newline at end of file
......@@ -4,11 +4,20 @@ sftp:
user: user
password: password
app:
directory: /memobase/test_record_set_2
sessionId: session1
recordSetId: /testset2
institutionId: mrv
isPublished: false
xmlRecordTag: record
xmlIdentifierFieldName: id
tableSheetIndex: 1
tableHeaderCount: 1
tableHeaderIndex: 1
tableIdentifierIndex: 1
kafka:
producer:
bootstrap.servers: localhost:12345
client.id: sftp-reader-p1-j1
client.id: test-file-validation-client
topic:
out: sftp-reader-p1-j1
process: p1-reporting
\ No newline at end of file
out: import-process-data-transform
process: import-process-reporting
\ No newline at end of file
......@@ -4,11 +4,20 @@ sftp:
user: user
password: password
app:
directory: /memobase/test_record_set_3
sessionId: session1
recordSetId: /testset3
institutionId: mrv
isPublished: false
xmlRecordTag: record
xmlIdentifierFieldName: id
tableSheetIndex: 1
tableHeaderCount: 1
tableHeaderIndex: 1
tableIdentifierIndex: 1
kafka:
producer:
bootstrap.servers: localhost:12345
client.id: sftp-reader-p1-j1
client.id: test-file-validation-client
topic:
out: sftp-reader-p1-j1
process: p1-reporting
\ No newline at end of file
out: import-process-data-transform
process: import-process-reporting
\ No newline at end of file
......@@ -4,11 +4,20 @@ sftp:
user: user
password: password
app:
directory: /memobase/test_record_set_4
sessionId: session1
recordSetId: /testset4
institutionId: mrv
isPublished: false
xmlRecordTag: record
xmlIdentifierFieldName: id
tableSheetIndex: 1
tableHeaderCount: 1
tableHeaderIndex: 1
tableIdentifierIndex: 1
kafka:
producer:
bootstrap.servers: localhost:12345
client.id: sftp-reader-p1-j1
client.id: test-file-validation-client
topic:
out: sftp-reader-p1-j1
process: p1-reporting
\ No newline at end of file
out: import-process-data-transform
process: import-process-reporting
\ No newline at end of file
......@@ -4,11 +4,20 @@ sftp:
user: user
password: password
app:
directory: /memobase/test_record_set_5
sessionId: session1
recordSetId: /testset5
institutionId: mrv
isPublished: false
xmlRecordTag: record
xmlIdentifierFieldName: id
tableSheetIndex: 1
tableHeaderCount: 1
tableHeaderIndex: 1
tableIdentifierIndex: 1
kafka:
producer:
bootstrap.servers: localhost:12345
client.id: sftp-reader-p1-j1
client.id: test-file-validation-client
topic:
out: sftp-reader-p1-j1
process: p1-reporting
\ No newline at end of file
out: import-process-data-transform
process: import-process-reporting
\ No newline at end of file
......@@ -4,11 +4,20 @@ sftp:
user: user
password: password
app:
directory: /memobase/test_record_set_6
sessionId: session1
recordSetId: /testset6
institutionId: mrv
isPublished: false
xmlRecordTag: record
xmlIdentifierFieldName: id
tableSheetIndex: 1
tableHeaderCount: 1
tableHeaderIndex: 1
tableIdentifierIndex: 1
kafka:
producer:
bootstrap.servers: localhost:12345
client.id: sftp-reader-p1-j1
client.id: test-file-validation-client
topic:
out: sftp-reader-p1-j1
process: p1-reporting
\ No newline at end of file
out: import-process-data-transform
process: import-process-reporting
\ No newline at end of file
......@@ -4,11 +4,20 @@ sftp:
user: user
password: password
app:
directory: /memobase/test_record_set_7
sessionId: session1
recordSetId: /testset7
institutionId: mrv
isPublished: false
xmlRecordTag: record
xmlIdentifierFieldName: id
tableSheetIndex: 1
tableHeaderCount: 1
tableHeaderIndex: 1
tableIdentifierIndex: 1
kafka:
producer:
bootstrap.servers: localhost:12345
client.id: sftp-reader-p1-j1
client.id: test-file-validation-client
topic:
out: sftp-reader-p1-j1
process: p1-reporting
\ No newline at end of file
out: import-process-data-transform
process: import-process-reporting
\ No newline at end of file
......@@ -4,11 +4,20 @@ sftp:
user: user
password: password
app:
directory: /memobase/test_record_set_8
sessionId: session1
recordSetId: /testset8
institutionId: mrv
isPublished: false
xmlRecordTag: record
xmlIdentifierFieldName: id
tableSheetIndex: 1
tableHeaderCount: 1
tableHeaderIndex: 1
tableIdentifierIndex: 1
kafka:
producer:
bootstrap.servers: localhost:12345
client.id: sftp-reader-p1-j1
client.id: test-file-validation-client
topic:
out: sftp-reader-p1-j1
process: p1-reporting
\ No newline at end of file
out: import-process-data-transform
process: import-process-reporting
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment