Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
memoriav
Memobase 2020
services
Import Process
XML Data Transform
Commits
0b61b153
Commit
0b61b153
authored
Aug 05, 2020
by
Jonas Waeber
Browse files
Update reporting.
parent
3bef121f
Pipeline
#12318
passed with stages
in 5 minutes and 21 seconds
Changes
14
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
gradle/wrapper/gradle-wrapper.jar
0 → 100644
View file @
0b61b153
File added
gradle/wrapper/gradle-wrapper.properties
0 → 100644
View file @
0b61b153
#Tue May 19 16:49:13 CEST 2020
distributionUrl
=
https
\:
//services.gradle.org/distributions/gradle-6.3-all.zip
distributionBase
=
GRADLE_USER_HOME
distributionPath
=
GRADLE_USER_HOME
zipStorePath
=
wrapper/dists
zipStoreBase
=
GRADLE_USER_HOME
src/main/kotlin/KafkaTopology.kt
View file @
0b61b153
...
...
@@ -67,18 +67,17 @@ class KafkaTopology(private val settings: SettingsLoader) {
// report full process as failure
errorFilter
[
0
]
.
mapValues
{
key
,
_
->
Klaxon
().
toJsonString
(
Report
(
"xml-data-transform"
,
ReportStatus
.
failure
,
ReportMessages
.
processFailure
(
key
,
"The input file is invalid."
)
)
.
mapValues
{
_
->
ProcessReport
(
"xml-data-transform"
,
ReportStatus
.
failure
,
1
,
0
,
1
)
}
.
to
(
settings
.
processReportTopic
)
// TODO: Implement multi record xml documents splitter!
val
transformedValue
=
errorFilter
[
1
]
.
mapValues
{
value
->
sftpClient
.
open
(
File
(
value
.
path
))
}
.
map
{
key
,
value
->
transformXml
(
key
,
value
)
}
...
...
@@ -94,11 +93,30 @@ class KafkaTopology(private val settings: SettingsLoader) {
.
to
(
settings
.
outputTopic
)
stream
.
mapValues
{
value
->
value
.
r
eport
!!
.
toJson
()
}
.
mapValues
{
value
->
value
.
getR
eport
()
.
toJson
()
}
.
to
(
reportingTopic
)
stream
.
mapValues
{
value
->
value
.
report
!!
.
toJson
()
}
.
mapValues
{
value
->
val
report
=
value
.
getReport
()
if
(
report
.
status
==
ReportStatus
.
success
)
{
ProcessReport
(
report
.
id
,
ReportStatus
.
success
,
1
,
1
,
0
)
}
else
{
ProcessReport
(
report
.
id
,
ReportStatus
.
failure
,
1
,
0
,
1
)
}
}
.
to
(
settings
.
processReportTopic
)
}
...
...
@@ -114,6 +132,4 @@ class KafkaTopology(private val settings: SettingsLoader) {
}
}
src/main/kotlin/ProcessReport.kt
0 → 100644
View file @
0b61b153
/*
* xml-data-transform
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package
org.memobase
import
com.beust.klaxon.Klaxon
data class
ProcessReport
(
val
id
:
String
,
val
status
:
String
,
val
total
:
Int
,
val
successes
:
Int
,
val
failures
:
Int
)
{
fun
toJson
():
String
{
return
Klaxon
().
toJsonString
(
this
)
}
}
src/main/kotlin/Report.kt
View file @
0b61b153
/*
*
sftp-reader
*
xml-data-transform
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
...
...
src/main/kotlin/SAXContentHandler.kt
View file @
0b61b153
...
...
@@ -25,14 +25,26 @@ import org.xml.sax.ContentHandler
import
org.xml.sax.Locator
import
java.io.StringWriter
class
SAXContentHandler
(
key
:
String
,
private
val
identifierFieldName
:
String
,
private
val
recordTag
:
String
)
:
ContentHandler
{
class
SAXContentHandler
(
key
:
String
,
private
val
identifierFieldName
:
String
,
private
val
recordTag
:
String
)
:
ContentHandler
{
private
val
log
=
LogManager
.
getLogger
(
"SAXHandler"
)
val
output
=
StringWriter
()
var
identifier
:
String
=
key
var
report
:
Report
?
=
null
private
var
report
:
Report
?
=
null
private
val
jsonResult
=
JsonObject
()
fun
getReport
():
Report
{
return
report
.
let
{
it
?:
Report
(
identifier
,
ReportStatus
.
failure
,
"Unknown Failure: No report found."
)
}
}
private
var
currentElementTag
:
String
=
""
private
var
currentInnerElementTag
:
String
=
""
private
var
currentElementContent
:
String
=
""
...
...
src/test/kotlin/TestIntegration.kt
0 → 100644
View file @
0b61b153
/*
* xml-data-transform
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package
org.memobase
import
org.junit.jupiter.api.TestInstance
@TestInstance
(
TestInstance
.
Lifecycle
.
PER_CLASS
)
class
TestIntegration
{
}
\ No newline at end of file
src/test/kotlin/TestParams.kt
View file @
0b61b153
...
...
@@ -24,5 +24,5 @@ data class TestParams(
val
inputKey
:
String
,
val
outputKey
:
String
,
val
expectedOutputReport
:
Report
,
val
expectedOutputProcessReport
:
Report
val
expectedOutputProcessReport
:
Process
Report
)
src/test/kotlin/TestXsltTransform.kt
View file @
0b61b153
...
...
@@ -44,7 +44,7 @@ class TestXsltTransform {
}
@Test
fun
`test
old
memobase
xslt
tranform`
()
{
fun
`test
old
memobase
xslt
tran
s
form`
()
{
val
props
=
Properties
()
props
.
setProperty
(
"xsltFilePath"
,
"${resourcePath}/test_transform/test-transformer.xslt"
)
props
.
setProperty
(
"identifierFieldName"
,
"identifierMain"
)
...
...
src/test/resources/data/1/config/transform.xslt
0 → 100644
View file @
0b61b153
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet
xmlns:xsl=
"http://www.w3.org/1999/XSL/Transform"
xmlns:fn=
"http://www.w3.org/2005/xpath-functions"
version=
"2.0"
xmlns:ns2=
"http://purl.org/dc/elements/1.1/"
xmlns:foxml=
"info:fedora/fedora-system:def/foxml#"
xmlns:xsi=
"http://www.w3.org/2001/XMLSchema-instance"
xmlns:audit=
"info:fedora/fedora-system:def/audit#"
xmlns:dc=
"http://purl.org/dc/elements/1.1/"
xmlns:oai_dc=
"http://www.openarchives.org/OAI/2.0/oai_dc/"
xmlns:rdf=
"http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs=
"http://www.w3.org/2000/01/rdf-schema#"
xmlns:fedora=
"info:fedora/fedora-system:def/relations-external#"
xmlns:fedora-model=
"info:fedora/fedora-system:def/model#"
xmlns:xs=
"http://www.w3.org/2001/XMLSchema"
xmlns:xslt=
"http://www.w3.org/1999/XSL/Transform"
xmlns=
"urn:ebu:metadata-schema:ebuCore_2012"
xpath-default-namespace=
"urn:ebu:metadata-schema:ebuCore_2012"
>
<xsl:output
indent=
"yes"
method=
"xml"
/>
<!--<xsl:template match="/digitalObject/datastream/datastreamVersion/xmlContent/ebuCoreMain/coreMetadata">
<xsl:copy-of select="."/>
</xsl:template>-->
<xsl:template
match=
"coreMetadata"
>
<xsl:element
name=
"record"
>
<!--<xsl:apply-templates select="test"/>
<xsl:apply-templates select="title, ns2:title, alternativeTitle, subject, description, format/essenceLocator, format/medium, type/*, references"/>-->
<xsl:apply-templates
/>
</xsl:element>
</xsl:template>
<!-- content of child node is copied to parent node -->
<xsl:template
match=
"title | format/essenceLocator | format/duration | format/start | language | references | coverage/spatial/location"
>
<xsl:element
name=
"{local-name()}"
>
<xsl:value-of
select=
"child::*"
/>
</xsl:element>
</xsl:template>
<!-- content of typeLabel is transformed to an element name and content of child node is copied to it-->
<xsl:template
match=
"description[@typeLabel] | alternativeTitle[@typeLabel] | subject[@typeLabel] | relation[@typeLabel]"
>
<xsl:variable
name=
"typeLabel"
select=
"@typeLabel"
/>
<xsl:element
name=
"{$typeLabel}"
>
<xsl:value-of
select=
"child::*"
/>
</xsl:element>
</xsl:template>
<!-- content of typeLabel is copied as content of the node -->
<xsl:template
match=
"format/medium | format/dataFormat/captioningFormat | type/genre"
>
<xsl:element
name=
"{local-name()}"
>
<xsl:value-of
select=
"@typeLabel | @language"
/>
</xsl:element>
</xsl:template>
<xsl:template
match=
"type/objectType"
>
<xsl:element
name=
"objectType"
>
<xsl:choose>
<xsl:when
test=
"matches(@typeLabel,'film','i')"
>
<xsl:text>
Film
</xsl:text>
</xsl:when>
<xsl:when
test=
"matches(@typeLabel,'photograph|foto','i')"
>
<xsl:text>
Foto
</xsl:text>
</xsl:when>
<xsl:when
test=
"matches(@typeLabel,'radio','i')"
>
<xsl:text>
Radio
</xsl:text>
</xsl:when>
<xsl:when
test=
"matches(@typeLabel,'television|tv','i')"
>
<xsl:text>
TV
</xsl:text>
</xsl:when>
<xsl:when
test=
"matches(@typeLabel,'ton|sound','i')"
>
<xsl:text>
Ton
</xsl:text>
</xsl:when>
<xsl:when
test=
"matches(@typeLabel,'video','i')"
>
<xsl:text>
Video
</xsl:text>
</xsl:when>
</xsl:choose>
</xsl:element>
</xsl:template>
<xsl:template
match=
"identifier"
>
<xsl:variable
name=
"typeLabel"
select=
"@typeLabel"
/>
<xsl:element
name=
"identifier{$typeLabel}"
>
<xsl:value-of
select=
"child::ns2:identifier"
/>
</xsl:element>
</xsl:template>
<xsl:template
match=
"format/videoFormat | format/imageFormat | format/audioFormat"
>
<xsl:variable
name=
"format"
select=
"local-name()"
/>
<xsl:for-each
select=
"technicalAttributeString"
>
<xsl:variable
name=
"typeLabel"
select=
"@typeLabel"
/>
<xsl:element
name=
"{$format}{$typeLabel}"
>
<xsl:value-of
select=
"."
/>
</xsl:element>
</xsl:for-each>
<xsl:for-each
select=
"width | height"
>
<xsl:variable
name=
"local"
select=
"local-name()"
/>
<xsl:variable
name=
"unit"
select=
"@unit"
/>
<xsl:element
name=
"{$local}"
>
<xsl:value-of
select=
"concat(., ' ', $unit)"
/>
</xsl:element>
</xsl:for-each>
<xsl:for-each
select=
"audioTrackConfiguration"
>
<xsl:element
name=
"audioTrackConfiguration"
>
<xsl:value-of
select=
"@typeLabel"
/>
</xsl:element>
</xsl:for-each>
</xsl:template>
<xsl:template
match=
"contributor | creator | publisher"
>
<xsl:variable
name=
"type"
select=
"local-name()"
/>
<xsl:for-each
select=
"."
>
<xsl:variable
name=
"role"
select=
"child::role/@typeLabel[. != '']"
/>
<xsl:choose>
<xsl:when
test=
"$role = 'ResponsibleInstitution'"
/>
<xsl:when
test=
"$role = 'Producer'"
>
<xsl:choose>
<xsl:when
test=
"child::organisationDetails"
>
<xsl:element
name=
"producerCorporateBodyName"
>
<xsl:value-of
select=
"descendant::organisationName"
/>
</xsl:element>
</xsl:when>
<xsl:when
test=
"child::contactDetails"
>
<xsl:element
name=
"producerPersonName"
>
<xsl:value-of
select=
"descendant::name"
/>
</xsl:element>
</xsl:when>
</xsl:choose>
</xsl:when>
<xsl:when
test=
"child::organisationDetails"
>
<xsl:element
name=
"{$type}CorporateBody"
>
<xsl:element
name=
"name"
>
<xsl:value-of
select=
"descendant::organisationName"
/>
</xsl:element>
<xsl:if
test=
"$role"
>
<xsl:element
name=
"role"
>
<xsl:value-of
select=
"$role"
/>
</xsl:element>
</xsl:if>
</xsl:element>
</xsl:when>
<xsl:when
test=
"child::contactDetails"
>
<xsl:element
name=
"{$type}Person"
>
<xsl:element
name=
"name"
>
<xsl:value-of
select=
"descendant::name"
/>
</xsl:element>
<xsl:if
test=
"$role"
>
<xsl:element
name=
"role"
>
<xsl:value-of
select=
"$role"
/>
</xsl:element>
</xsl:if>
</xsl:element>
</xsl:when>
</xsl:choose>
</xsl:for-each>
</xsl:template>
<xsl:template
match=
"rights[@typeLabel='Access']/ns2:rights"
>
<xsl:if
test=
"matches(.,'onsite')"
>
<xsl:element
name=
"accessPhysical"
>
<xsl:text>
onsite
</xsl:text>
</xsl:element>
</xsl:if>
<xsl:if
test=
"matches(.,'noonsite')"
>
<xsl:element
name=
"accessPhysical"
>
<xsl:text>
noonsite
</xsl:text>
</xsl:element>
</xsl:if>
<xsl:if
test=
"matches(.,'public')"
>
<xsl:element
name=
"accessDigital"
>
<xsl:text>
public
</xsl:text>
</xsl:element>
</xsl:if>
<xsl:if
test=
"matches(.,'private')"
>
<xsl:element
name=
"accessDigital"
>
<xsl:text>
private
</xsl:text>
</xsl:element>
</xsl:if>
<xsl:if
test=
"matches(.,'faro')"
>
<xsl:element
name=
"accessDigital"
>
<xsl:text>
faro
</xsl:text>
</xsl:element>
</xsl:if>
</xsl:template>
<xsl:template
match=
"rights[@typeLabel='Holder']/rightsHolder/contactDetails/name"
>
<xsl:element
name=
"rightsHolder"
>
<xsl:value-of
select=
"."
/>
</xsl:element>
</xsl:template>
<xsl:template
match=
"ns2:source"
>
<xsl:element
name=
"source"
>
<xsl:value-of
select=
"."
/>
</xsl:element>
</xsl:template>
<xsl:template
match=
"date | temporal"
>
<xsl:for-each
select=
"."
>
<xsl:choose>
<xsl:when
test=
"child::created"
>
<xsl:element
name=
"dateCreated"
>
<xsl:call-template
name=
"dates"
/>
</xsl:element>
</xsl:when>
<xsl:when
test=
"child::issued"
>
<xsl:element
name=
"dateIssued"
>
<xsl:call-template
name=
"dates"
/>
</xsl:element>
</xsl:when>
<xsl:when
test=
"child::PeriodOfTime"
>
<xsl:element
name=
"temporal"
>
<xsl:choose>
<xsl:when
test=
"descendant::periodName"
>
<xsl:value-of
select=
"descendant::periodName"
/>
</xsl:when>
<xsl:otherwise>
<xsl:call-template
name=
"dates"
/>
</xsl:otherwise>
</xsl:choose>
</xsl:element>
</xsl:when>
</xsl:choose>
</xsl:for-each>
</xsl:template>
<xsl:template
name=
"dates"
>
<xsl:for-each
select=
"child::*"
>
<xsl:variable
name=
"startDate"
select=
"@startDate[. != '']"
/>
<xsl:variable
name=
"startYear"
select=
"@startYear[. != '']"
/>
<xsl:variable
name=
"startTime"
select=
"@startTime[. != '']"
/>
<xsl:variable
name=
"endDate"
select=
"@endDate[. != '']"
/>
<xsl:variable
name=
"endYear"
select=
"@endYear[. != '']"
/>
<xsl:variable
name=
"period"
select=
"@period[. != '']"
/>
<xsl:choose>
<!-- Combinations which are not present in created/issued/temporal and are not processed:
* endYear and startDate
* period and endDate and startYear
* period and endYear and startDate
* startTime in another combination but with startDate
* period and endDate only
-->
<xsl:when
test=
"$startDate and not($endDate | $endYear | $period | $startTime)"
>
<xsl:value-of
select=
"$startDate"
/>
</xsl:when>
<xsl:when
test=
"$startYear and not($endDate | $endYear | $period | $startTime)"
>
<xsl:value-of
select=
"$startYear"
/>
</xsl:when>
<xsl:when
test=
"$startDate and $endDate and not($period | $startTime)"
>
<xsl:value-of
select=
"concat($startDate, '/', $endDate)"
/>
</xsl:when>
<xsl:when
test=
"$startYear and $endDate and not($period | $startTime)"
>
<xsl:value-of
select=
"concat($startYear, '/', $endDate)"
/>
</xsl:when>
<xsl:when
test=
"$startYear and $endYear and not($period | $startTime)"
>
<xsl:value-of
select=
"concat($startYear, '/', $endYear)"
/>
</xsl:when>
<xsl:when
test=
"$startTime"
>
<xsl:value-of
select=
"concat($startDate, 'T', $startTime)"
/>
</xsl:when>
<xsl:when
test=
"$endDate and not ($startDate | $startYear | $period)"
>
<xsl:value-of
select=
"concat('?/', $endDate)"
/>
</xsl:when>
<xsl:when
test=
"$endYear and not ($startDate | $startYear | $period)"
>
<xsl:value-of
select=
"concat('?/', $endYear)"
/>
</xsl:when>
<xsl:when
test=
"$period and not($endDate | $endYear | $startDate | $startYear)"
>
<xsl:value-of
select=
"$period"
/>
</xsl:when>
<xsl:when
test=
"$period and $startDate and not ($endDate | $endYear)"
>
<xsl:value-of
select=
"concat($period, ' ', $startDate)"
/>
</xsl:when>
<xsl:when
test=
"$period and $startYear and not ($endDate | $endYear)"
>
<xsl:value-of
select=
"concat($period, ' ', $startYear)"
/>
</xsl:when>
<xsl:when
test=
"$period and $startDate and $endDate"
>
<xsl:value-of
select=
"concat($period, ' ', $startDate, '/', $endDate)"
/>
</xsl:when>
<xsl:when
test=
"$period and $startYear and $endYear"
>
<xsl:choose>
<xsl:when
test=
"matches(@period,concat(@startYear, '-', @endYear))"
>
<xsl:value-of
select=
"concat($startYear, '/', $endYear)"
/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of
select=
"concat($period, ' ', $startYear, '/', $endYear)"
/>
</xsl:otherwise>
</xsl:choose>
</xsl:when>
</xsl:choose>
</xsl:for-each>
</xsl:template>
<!-- field to delete -->
<xsl:template
match=
"isMemberOf | rights[@typeLabel='Usage']"
/>
</xsl:stylesheet>
\ No newline at end of file
src/test/resources/data/
2
/data.xml
→
src/test/resources/data/
1
/data.xml
View file @
0b61b153
File moved
src/test/resources/data/
2
/input.json
→
src/test/resources/data/
1
/input.json
View file @
0b61b153
File moved
src/test/resources/data/
2
/output.json
→
src/test/resources/data/
1
/output.json
View file @
0b61b153
File moved
src/test/resources/data/2/config/transform.xslt
deleted
100644 → 0
View file @
3bef121f
<?xml version="1.0"?>
<!--
~ xml-data-transform
~ Copyright (C) 2020 Memoriav
~
~ This program is free software: you can redistribute it and/or modify
~ it under the terms of the GNU Affero General Public License as published by
~ the Free Software Foundation, either version 3 of the License, or
~ (at your option) any later version.
~
~ This program is distributed in the hope that it will be useful,
~ but WITHOUT ANY WARRANTY; without even the implied warranty of
~ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
~ GNU Affero General Public License for more details.
~
~ You should have received a copy of the GNU Affero General Public License
~ along with this program. If not, see <https://www.gnu.org/licenses/>.
-->
<xsl:stylesheet
version=
"1.0"
xmlns:xsl=
"http://www.w3.org/1999/XSL/Transform"
>
</xsl:stylesheet>
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment