Commit 0b61b153 authored by Jonas Waeber's avatar Jonas Waeber
Browse files

Update reporting.

parent 3bef121f
Pipeline #12318 passed with stages
in 5 minutes and 21 seconds
#Tue May 19 16:49:13 CEST 2020
distributionUrl=https\://services.gradle.org/distributions/gradle-6.3-all.zip
distributionBase=GRADLE_USER_HOME
distributionPath=GRADLE_USER_HOME
zipStorePath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
......@@ -67,18 +67,17 @@ class KafkaTopology(private val settings: SettingsLoader) {
// report full process as failure
errorFilter[0]
.mapValues { key, _ ->
Klaxon().toJsonString(
Report(
.mapValues { _ ->
ProcessReport(
"xml-data-transform",
ReportStatus.failure,
ReportMessages.processFailure(key, "The input file is invalid.")
)
1,
0,
1
)
}
.to(settings.processReportTopic)
// TODO: Implement multi record xml documents splitter!
val transformedValue = errorFilter[1]
.mapValues { value -> sftpClient.open(File(value.path)) }
.map { key, value -> transformXml(key, value) }
......@@ -94,11 +93,30 @@ class KafkaTopology(private val settings: SettingsLoader) {
.to(settings.outputTopic)
stream
.mapValues { value -> value.report!!.toJson() }
.mapValues { value -> value.getReport().toJson() }
.to(reportingTopic)
stream
.mapValues { value -> value.report!!.toJson() }
.mapValues { value ->
val report = value.getReport()
if (report.status == ReportStatus.success) {
ProcessReport(
report.id,
ReportStatus.success,
1,
1,
0
)
} else {
ProcessReport(
report.id,
ReportStatus.failure,
1,
0,
1
)
}
}
.to(settings.processReportTopic)
}
......@@ -114,6 +132,4 @@ class KafkaTopology(private val settings: SettingsLoader) {
}
}
/*
* xml-data-transform
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase
import com.beust.klaxon.Klaxon
data class ProcessReport(
val id: String,
val status: String,
val total: Int,
val successes: Int,
val failures: Int
) {
fun toJson(): String {
return Klaxon().toJsonString(this)
}
}
/*
* sftp-reader
* xml-data-transform
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
......
......@@ -25,14 +25,26 @@ import org.xml.sax.ContentHandler
import org.xml.sax.Locator
import java.io.StringWriter
class SAXContentHandler(key: String, private val identifierFieldName: String, private val recordTag: String) : ContentHandler {
class SAXContentHandler(key: String, private val identifierFieldName: String, private val recordTag: String) :
ContentHandler {
private val log = LogManager.getLogger("SAXHandler")
val output = StringWriter()
var identifier: String = key
var report: Report? = null
private var report: Report? = null
private val jsonResult = JsonObject()
fun getReport(): Report {
return report.let {
it
?: Report(
identifier,
ReportStatus.failure,
"Unknown Failure: No report found."
)
}
}
private var currentElementTag: String = ""
private var currentInnerElementTag: String = ""
private var currentElementContent: String = ""
......
/*
* xml-data-transform
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase
import org.junit.jupiter.api.TestInstance
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
class TestIntegration {
}
\ No newline at end of file
......@@ -24,5 +24,5 @@ data class TestParams(
val inputKey: String,
val outputKey: String,
val expectedOutputReport: Report,
val expectedOutputProcessReport: Report
val expectedOutputProcessReport: ProcessReport
)
......@@ -44,7 +44,7 @@ class TestXsltTransform {
}
@Test
fun `test old memobase xslt tranform`() {
fun `test old memobase xslt transform`() {
val props = Properties()
props.setProperty("xsltFilePath", "${resourcePath}/test_transform/test-transformer.xslt")
props.setProperty("identifierFieldName", "identifierMain")
......
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:fn="http://www.w3.org/2005/xpath-functions"
version="2.0"
xmlns:ns2="http://purl.org/dc/elements/1.1/"
xmlns:foxml="info:fedora/fedora-system:def/foxml#"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:audit="info:fedora/fedora-system:def/audit#"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
xmlns:fedora="info:fedora/fedora-system:def/relations-external#"
xmlns:fedora-model="info:fedora/fedora-system:def/model#"
xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xslt="http://www.w3.org/1999/XSL/Transform"
xmlns="urn:ebu:metadata-schema:ebuCore_2012"
xpath-default-namespace="urn:ebu:metadata-schema:ebuCore_2012">
<xsl:output
indent="yes"
method="xml"
/>
<!--<xsl:template match="/digitalObject/datastream/datastreamVersion/xmlContent/ebuCoreMain/coreMetadata">
<xsl:copy-of select="."/>
</xsl:template>-->
<xsl:template match="coreMetadata">
<xsl:element name="record">
<!--<xsl:apply-templates select="test"/>
<xsl:apply-templates select="title, ns2:title, alternativeTitle, subject, description, format/essenceLocator, format/medium, type/*, references"/>-->
<xsl:apply-templates />
</xsl:element>
</xsl:template>
<!-- content of child node is copied to parent node -->
<xsl:template match="title | format/essenceLocator | format/duration | format/start | language | references | coverage/spatial/location">
<xsl:element name="{local-name()}">
<xsl:value-of select="child::*"/>
</xsl:element>
</xsl:template>
<!-- content of typeLabel is transformed to an element name and content of child node is copied to it-->
<xsl:template match="description[@typeLabel] | alternativeTitle[@typeLabel] | subject[@typeLabel] | relation[@typeLabel]">
<xsl:variable name="typeLabel" select="@typeLabel"/>
<xsl:element name="{$typeLabel}">
<xsl:value-of select="child::*"/>
</xsl:element>
</xsl:template>
<!-- content of typeLabel is copied as content of the node -->
<xsl:template match="format/medium | format/dataFormat/captioningFormat | type/genre">
<xsl:element name="{local-name()}">
<xsl:value-of select="@typeLabel | @language"/>
</xsl:element>
</xsl:template>
<xsl:template match="type/objectType">
<xsl:element name="objectType">
<xsl:choose>
<xsl:when test="matches(@typeLabel,'film','i')">
<xsl:text>Film</xsl:text>
</xsl:when>
<xsl:when test="matches(@typeLabel,'photograph|foto','i')">
<xsl:text>Foto</xsl:text>
</xsl:when>
<xsl:when test="matches(@typeLabel,'radio','i')">
<xsl:text>Radio</xsl:text>
</xsl:when>
<xsl:when test="matches(@typeLabel,'television|tv','i')">
<xsl:text>TV</xsl:text>
</xsl:when>
<xsl:when test="matches(@typeLabel,'ton|sound','i')">
<xsl:text>Ton</xsl:text>
</xsl:when>
<xsl:when test="matches(@typeLabel,'video','i')">
<xsl:text>Video</xsl:text>
</xsl:when>
</xsl:choose>
</xsl:element>
</xsl:template>
<xsl:template match="identifier">
<xsl:variable name="typeLabel" select="@typeLabel"/>
<xsl:element name="identifier{$typeLabel}">
<xsl:value-of select="child::ns2:identifier"/>
</xsl:element>
</xsl:template>
<xsl:template match="format/videoFormat | format/imageFormat | format/audioFormat">
<xsl:variable name="format" select="local-name()"/>
<xsl:for-each select="technicalAttributeString">
<xsl:variable name="typeLabel" select="@typeLabel"/>
<xsl:element name="{$format}{$typeLabel}">
<xsl:value-of select="."/>
</xsl:element>
</xsl:for-each>
<xsl:for-each select="width | height">
<xsl:variable name="local" select="local-name()"/>
<xsl:variable name="unit" select="@unit"/>
<xsl:element name="{$local}">
<xsl:value-of select="concat(., ' ', $unit)"/>
</xsl:element>
</xsl:for-each>
<xsl:for-each select="audioTrackConfiguration">
<xsl:element name="audioTrackConfiguration">
<xsl:value-of select="@typeLabel"/>
</xsl:element>
</xsl:for-each>
</xsl:template>
<xsl:template match="contributor | creator | publisher">
<xsl:variable name="type" select="local-name()"/>
<xsl:for-each select=".">
<xsl:variable name="role" select="child::role/@typeLabel[. != '']"/>
<xsl:choose>
<xsl:when test="$role = 'ResponsibleInstitution'"/>
<xsl:when test="$role = 'Producer'">
<xsl:choose>
<xsl:when test="child::organisationDetails">
<xsl:element name="producerCorporateBodyName">
<xsl:value-of select="descendant::organisationName"/>
</xsl:element>
</xsl:when>
<xsl:when test="child::contactDetails">
<xsl:element name="producerPersonName">
<xsl:value-of select="descendant::name"/>
</xsl:element>
</xsl:when>
</xsl:choose>
</xsl:when>
<xsl:when test="child::organisationDetails">
<xsl:element name="{$type}CorporateBody">
<xsl:element name="name">
<xsl:value-of select="descendant::organisationName"/>
</xsl:element>
<xsl:if test="$role">
<xsl:element name="role">
<xsl:value-of select="$role"/>
</xsl:element>
</xsl:if>
</xsl:element>
</xsl:when>
<xsl:when test="child::contactDetails">
<xsl:element name="{$type}Person">
<xsl:element name="name">
<xsl:value-of select="descendant::name"/>
</xsl:element>
<xsl:if test="$role">
<xsl:element name="role">
<xsl:value-of select="$role"/>
</xsl:element>
</xsl:if>
</xsl:element>
</xsl:when>
</xsl:choose>
</xsl:for-each>
</xsl:template>
<xsl:template match="rights[@typeLabel='Access']/ns2:rights">
<xsl:if test="matches(.,'onsite')">
<xsl:element name="accessPhysical">
<xsl:text>onsite</xsl:text>
</xsl:element>
</xsl:if>
<xsl:if test="matches(.,'noonsite')">
<xsl:element name="accessPhysical">
<xsl:text>noonsite</xsl:text>
</xsl:element>
</xsl:if>
<xsl:if test="matches(.,'public')">
<xsl:element name="accessDigital">
<xsl:text>public</xsl:text>
</xsl:element>
</xsl:if>
<xsl:if test="matches(.,'private')">
<xsl:element name="accessDigital">
<xsl:text>private</xsl:text>
</xsl:element>
</xsl:if>
<xsl:if test="matches(.,'faro')">
<xsl:element name="accessDigital">
<xsl:text>faro</xsl:text>
</xsl:element>
</xsl:if>
</xsl:template>
<xsl:template match="rights[@typeLabel='Holder']/rightsHolder/contactDetails/name">
<xsl:element name="rightsHolder">
<xsl:value-of select="."/>
</xsl:element>
</xsl:template>
<xsl:template match="ns2:source">
<xsl:element name="source">
<xsl:value-of select="."/>
</xsl:element>
</xsl:template>
<xsl:template match="date | temporal">
<xsl:for-each select=".">
<xsl:choose>
<xsl:when test="child::created">
<xsl:element name="dateCreated">
<xsl:call-template name="dates"/>
</xsl:element>
</xsl:when>
<xsl:when test="child::issued">
<xsl:element name="dateIssued">
<xsl:call-template name="dates"/>
</xsl:element>
</xsl:when>
<xsl:when test="child::PeriodOfTime">
<xsl:element name="temporal">
<xsl:choose>
<xsl:when test="descendant::periodName">
<xsl:value-of select="descendant::periodName"/>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="dates"/>
</xsl:otherwise>
</xsl:choose>
</xsl:element>
</xsl:when>
</xsl:choose>
</xsl:for-each>
</xsl:template>
<xsl:template name="dates">
<xsl:for-each select="child::*">
<xsl:variable name="startDate" select="@startDate[. != '']"/>
<xsl:variable name="startYear" select="@startYear[. != '']"/>
<xsl:variable name="startTime" select="@startTime[. != '']"/>
<xsl:variable name="endDate" select="@endDate[. != '']"/>
<xsl:variable name="endYear" select="@endYear[. != '']"/>
<xsl:variable name="period" select="@period[. != '']"/>
<xsl:choose>
<!-- Combinations which are not present in created/issued/temporal and are not processed:
* endYear and startDate
* period and endDate and startYear
* period and endYear and startDate
* startTime in another combination but with startDate
* period and endDate only
-->
<xsl:when test="$startDate and not($endDate | $endYear | $period | $startTime)">
<xsl:value-of select="$startDate"/>
</xsl:when>
<xsl:when test="$startYear and not($endDate | $endYear | $period | $startTime)">
<xsl:value-of select="$startYear"/>
</xsl:when>
<xsl:when test="$startDate and $endDate and not($period | $startTime)">
<xsl:value-of select="concat($startDate, '/', $endDate)"/>
</xsl:when>
<xsl:when test="$startYear and $endDate and not($period | $startTime)">
<xsl:value-of select="concat($startYear, '/', $endDate)"/>
</xsl:when>
<xsl:when test="$startYear and $endYear and not($period | $startTime)">
<xsl:value-of select="concat($startYear, '/', $endYear)"/>
</xsl:when>
<xsl:when test="$startTime">
<xsl:value-of select="concat($startDate, 'T', $startTime)"/>
</xsl:when>
<xsl:when test="$endDate and not ($startDate | $startYear | $period)">
<xsl:value-of select="concat('?/', $endDate)"/>
</xsl:when>
<xsl:when test="$endYear and not ($startDate | $startYear | $period)">
<xsl:value-of select="concat('?/', $endYear)"/>
</xsl:when>
<xsl:when test="$period and not($endDate | $endYear | $startDate | $startYear)">
<xsl:value-of select="$period"/>
</xsl:when>
<xsl:when test="$period and $startDate and not ($endDate | $endYear)">
<xsl:value-of select="concat($period, ' ', $startDate)"/>
</xsl:when>
<xsl:when test="$period and $startYear and not ($endDate | $endYear)">
<xsl:value-of select="concat($period, ' ', $startYear)"/>
</xsl:when>
<xsl:when test="$period and $startDate and $endDate">
<xsl:value-of select="concat($period, ' ', $startDate, '/', $endDate)"/>
</xsl:when>
<xsl:when test="$period and $startYear and $endYear">
<xsl:choose>
<xsl:when test="matches(@period,concat(@startYear, '-', @endYear))">
<xsl:value-of select="concat($startYear, '/', $endYear)"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="concat($period, ' ', $startYear, '/', $endYear)"/>
</xsl:otherwise>
</xsl:choose>
</xsl:when>
</xsl:choose>
</xsl:for-each>
</xsl:template>
<!-- field to delete -->
<xsl:template match="isMemberOf | rights[@typeLabel='Usage']"/>
</xsl:stylesheet>
\ No newline at end of file
<?xml version="1.0"?>
<!--
~ xml-data-transform
~ Copyright (C) 2020 Memoriav
~
~ This program is free software: you can redistribute it and/or modify
~ it under the terms of the GNU Affero General Public License as published by
~ the Free Software Foundation, either version 3 of the License, or
~ (at your option) any later version.
~
~ This program is distributed in the hope that it will be useful,
~ but WITHOUT ANY WARRANTY; without even the implied warranty of
~ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
~ GNU Affero General Public License for more details.
~
~ You should have received a copy of the GNU Affero General Public License
~ along with this program. If not, see <https://www.gnu.org/licenses/>.
-->
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
</xsl:stylesheet>
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment