Commit 5b8d057e authored by Jonas Waeber's avatar Jonas Waeber
Browse files

Add error handling to mapper for unexpected input.

First case is an empty person name.
parent 3fb487d2
Pipeline #21815 passed with stage
in 2 minutes and 9 seconds
......@@ -48,7 +48,7 @@ dependencies {
//compile group: 'org.elasticsearch.client', name: 'elasticsearch-rest-high-level-client', version: '7.1.0'
implementation "org.apache.logging.log4j:log4j-api:${log4jV}"
implementation 'org.memobase:memobase-service-utilities:2.0.5'
implementation 'org.memobase:memobase-service-utilities:2.0.13'
// YAML Parser
implementation 'org.snakeyaml:snakeyaml-engine:2.1'
......
......@@ -18,6 +18,7 @@
package ch.memobase.builder
import ch.memobase.exceptions.InvalidInputException
import ch.memobase.mapping.KEYS
import ch.memobase.mapping.MapperConfiguration
import ch.memobase.mapping.fields.ConstantField
......@@ -26,6 +27,7 @@ import com.beust.klaxon.JsonArray
import java.io.StringWriter
import org.apache.jena.riot.RDFDataMgr
import org.apache.jena.riot.RDFFormat
import org.apache.logging.log4j.LogManager
class ResourceBuilder(
private val source: Map<String, Any>,
......@@ -34,24 +36,29 @@ class ResourceBuilder(
private val recordSetId: String,
private val isPublished: Boolean
) {
private val log = LogManager.getLogger(this::class.java)
private var record: Record? = null
private var physicalObject: PhysicalObject? = null
private var digitalObject: DigitalObject? = null
private var recordId = ""
private var recordTypeValue = ""
var errorMessage = ""
val errorMessages = mutableListOf<String>()
fun extractRecordId(): ResourceBuilder {
if (source.containsKey(config.uri)) {
when (val id = source[config.uri]) {
is String -> recordId = id
is Int -> recordId = id.toString()
is JsonArray<*> -> "Found multiple values in the field '${config.uri}' for identifiers: ${id.joinToString(", ")}."
null -> errorMessage = "The value for id is 'null' in field '${config.uri}'."
else -> errorMessage = "Invalid value '$id' for id in field '${config.uri}'."
is JsonArray<*> -> "Found multiple values in the field '${config.uri}' for identifiers: ${
id.joinToString(
", "
)
}."
null -> errorMessages.add("The value for id is 'null' in field '${config.uri}'.")
else -> errorMessages.add("Invalid value '$id' for id in field '${config.uri}'.")
}
} else {
errorMessage = "No id for record found in field '${config.uri}'."
errorMessages.add("No id for record found in field '${config.uri}'.")
}
return this
}
......@@ -64,7 +71,7 @@ class ResourceBuilder(
recordTypeValue = when (val recordType = config.recordType) {
is MappedAnnotationField -> source[recordType.field].let {
if (it !is String) {
errorMessage = "No type for record in field '${recordType.field}' for source $source."
errorMessages.add("No type for record in field '${recordType.field}' for source $source.")
""
} else {
it
......@@ -78,7 +85,7 @@ class ResourceBuilder(
fun hasRecordType(): Boolean {
return if (recordTypeValue.isEmpty()) false
else if (!KEYS.validRecordTypeValues.contains(recordTypeValue)) {
errorMessage = "Record type $recordTypeValue is invalid. Must be one of ${KEYS.validRecordTypeValues}."
errorMessages.add("Record type $recordTypeValue is invalid. Must be one of ${KEYS.validRecordTypeValues}.")
false
} else {
true
......@@ -88,40 +95,52 @@ class ResourceBuilder(
fun generateRecord(): ResourceBuilder {
record = Record(recordId, recordTypeValue, recordSetId, institutionId, config.sponsoredByMemoriav, isPublished)
for (recordFieldMapper in config.recordFieldMappers) {
recordFieldMapper.apply(source, record!!)
try {
recordFieldMapper.apply(source, record!!)
} catch (ex: InvalidInputException) {
errorMessages.add(ex.localizedMessage + " Could not apply ${recordFieldMapper::class.java} for record.")
}
}
return this
}
fun generatePhysicalObject(): ResourceBuilder {
physicalObject =
if (config.physicalFieldMappers.isNotEmpty()) {
val physicalObject = PhysicalObject(recordId, recordSetId, institutionId, 1)
config.physicalFieldMappers.forEach {
if (config.physicalFieldMappers.isNotEmpty()) {
val physicalObject = PhysicalObject(recordId, recordSetId, institutionId, 1)
config.physicalFieldMappers.forEach {
try {
it.apply(source, physicalObject)
} catch (ex: InvalidInputException) {
errorMessages.add(ex.localizedMessage + " Could not apply ${it::class.java} for physical instantiation.")
}
record?.addInstantiation(physicalObject)
physicalObject.addRecord(record!!)
physicalObject
} else {
null
}
record?.addInstantiation(physicalObject)
physicalObject.addRecord(record!!)
physicalObject
} else {
null
}
return this
}
fun generateDigitalObject(): ResourceBuilder {
digitalObject =
if (config.digitalFieldMappers.isNotEmpty()) {
val digitalObject = DigitalObject(recordId, recordSetId, institutionId, 1)
config.digitalFieldMappers.forEach {
if (config.digitalFieldMappers.isNotEmpty()) {
val digitalObject = DigitalObject(recordId, recordSetId, institutionId, 1)
config.digitalFieldMappers.forEach {
try {
it.apply(source, digitalObject)
} catch (ex: InvalidInputException) {
errorMessages.add(ex.localizedMessage + " Could not apply ${it::class.java} for digital instantiation.")
}
digitalObject.addRecord(record!!)
record?.addInstantiation(digitalObject)
digitalObject
} else {
null
}
digitalObject.addRecord(record!!)
record?.addInstantiation(digitalObject)
digitalObject
} else {
null
}
return this
}
......@@ -137,7 +156,7 @@ class ResourceBuilder(
private fun hasDigitalObject(): Boolean = digitalObject != null
fun writeRecord(format: RDFFormat): Pair<String, String> {
fun writeRecord(format: RDFFormat): Triple<String, String, List<String>> {
return StringWriter().use { writer ->
RDFDataMgr.write(writer, record!!.model, format)
if (hasPhysicalObject()) {
......@@ -146,7 +165,7 @@ class ResourceBuilder(
if (hasDigitalObject()) {
RDFDataMgr.write(writer, digitalObject!!.model, format)
}
Pair(record!!.resource.uri, writer.toString().trim())
Triple(record!!.resource.uri, writer.toString().trim(), errorMessages)
}
}
}
......@@ -18,6 +18,7 @@
package ch.memobase.mapping.mappers
import ch.memobase.exceptions.InvalidInputException
import ch.memobase.mapping.fields.ConfigField
import ch.memobase.mapping.fields.ConstantField
import ch.memobase.mapping.fields.FieldParsers
......@@ -56,10 +57,19 @@ abstract class TypeFieldMapper : AbstractFieldMapper() {
sourceElement.value.forEachIndexed { index, s ->
if (s.isNotEmpty()) {
if (properties.size >= index + 1) {
log.debug("Adding ${field.key} to properties list at existing index $index.")
properties[index].add(Pair(field.key, field.toLiteral(s)))
} else {
properties.add(index, mutableListOf(Pair(field.key, field.toLiteral(s))))
log.debug("Adding ${field.key} to properties list at new index $index.")
log.debug(properties)
try {
properties.add(index, mutableListOf(Pair(field.key, field.toLiteral(s))))
} catch (ex: IndexOutOfBoundsException) {
throw InvalidInputException("Encountered an unexpected empty value for mapping ${field.key} -> ${field.field}.")
}
}
} else {
log.debug("Empty value for index $index.")
}
}
Empty -> log.debug("No valid value found for field ${field.key}.")
......
......@@ -24,6 +24,7 @@ import com.beust.klaxon.Klaxon
import java.io.File
import java.io.FileOutputStream
import org.apache.jena.riot.RDFFormat
import org.apache.logging.log4j.LogManager
import org.junit.jupiter.api.Disabled
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.TestInstance
......@@ -31,11 +32,11 @@ import org.junit.jupiter.api.TestInstance
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
@Disabled
class LocalTestRun {
private val log = LogManager.getLogger(this::class.java)
private val headerMetadata = HeaderMetadata(
"bab-001",
"swi-001",
"1",
"bab",
"swi",
false,
"record",
"identifierMain",
......@@ -45,9 +46,9 @@ class LocalTestRun {
@Test
@Disabled
fun `test local folder`() {
val folder = "/home/jonas/memobase/data/bab-001"
val inputFolder = "/home/jonas/memobase/data/test/step-2"
val outputFolder = "/home/jonas/memobase/data/test/step-3"
val folder = "/home/jonas/memobase/data/swi-001"
val inputFolder = "/home/jonas/memobase/data/test-swi/step-2"
val outputFolder = "/home/jonas/memobase/data/test-swi/step-3"
val mappingFile = "/config/mapping.yml"
val klaxon = Klaxon()
val configurationParser = MappingConfigurationParser(File(folder + mappingFile).readBytes())
......@@ -61,6 +62,7 @@ class LocalTestRun {
Pair(it.first, klaxon.parse<Map<String, Any>>(it.second).orEmpty())
}
.map {
log.debug(it.first)
val builder = ResourceBuilder(
it.second,
configuration,
......@@ -82,6 +84,9 @@ class LocalTestRun {
writer.write(result.second)
}
}
result.third.forEach {
log.error(it)
}
}
}
}
......
......@@ -31,6 +31,7 @@ import org.apache.jena.rdf.model.impl.SelectorImpl
import org.apache.jena.riot.Lang
import org.apache.jena.riot.RDFDataMgr
import org.apache.jena.riot.RDFFormat
import org.apache.logging.log4j.LogManager
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.TestInstance
......@@ -38,6 +39,7 @@ import org.junit.jupiter.api.assertAll
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
class TestAgentMapper {
private val log = LogManager.getLogger(this::class.java)
private val klaxon = Klaxon()
......@@ -203,9 +205,9 @@ class TestAgentMapper {
}
@Test
fun `test multiple contributor names`() {
val source = klaxon.parse<Map<String, Any>>(readInputFile("input-multiple-contributors")).orEmpty()
val mapping = MappingConfigurationParser(readMapping("mapping-multiple-contributors"))
fun `test multiple contributor missing name`() {
val source = klaxon.parse<Map<String, Any>>(readInputFile("input-1")).orEmpty()
val mapping = MappingConfigurationParser(readMapping("mapping-1"))
val configuration = mapping.get()
val result = ResourceBuilder(
source,
......@@ -223,14 +225,22 @@ class TestAgentMapper {
val ntriples = result
.writeRecord(RDFFormat.NTRIPLES_UTF8)
val turtle = result.writeRecord(RDFFormat.TURTLE_PRETTY)
assertThat(sort(ntriples.second.split("\n")))
.isEqualTo(readOutputFile("output-multiple-contributors"))
FileOutputStream(File("$resourcePath/output-multiple-contributors.ttl")).use {
it.bufferedWriter().use { writer ->
writer.write(turtle.second)
}
}
ntriples.third.forEach {
log.error(it)
}
assertAll(
"",
{
assertThat(sort(ntriples.second.split("\n")))
.isEqualTo(readOutputFile("output-1"))
},
{ assertThat(ntriples.third[0]).isEqualTo("Encountered an unexpected empty value for mapping name -> contributorPerson.name. Could not apply class ch.memobase.mapping.mappers.AgentFieldMapper for record.") }
)
}
}
{
"id": "identifier",
"contributorPerson": [
{
"name": "Person 1",
"role": "Relation 1"
},
{
"role": "Relation 2"
},
{
"name": "Person 3",
"role": "Relation 3"
}
]
}
\ No newline at end of file
{
"id": "identifier",
"contributorPerson": [
{
"name": "Simon Epiney / Conseiller national PDC Valais",
"role": "Gesprächspartner"
},
{
"name": "Markus Ruf / Conseiller national Démocrates suisses Berne",
"role": "Gesprächspartner"
},
{
"name": "Jean Cavadini / Conseiller aux Etats PL Neuchâtel",
"role": "Gesprächspartner"
},
{
"name": "Irène Gardiol / Conseillère nationale écologiste Vaud",
"role": "Gesprächspartner"
},
{
"name": "Flavio Maspoli / Conseiller national Lega Tessin",
"role": "Gesprächspartner"
},
{
"name": "Flavio Cotti / Conseiller fédéral DFAE",
"role": "Gesprächspartner"
},
{
"name": "Suzette Sandoz / Conseillère nationale PL Vaud",
"role": "Gesprächspartner"
},
{
"name": "Matthias Krafft / Directeur Direction droit international public DFAE",
"role": "Gesprächspartner"
},
{
"name": "Vital Darbellay / Conseiller national PDC Valais",
"role": "Gesprächspartner"
},
{
"name": "Jacques Martin / Conseiller aux Etats PRD Vaud",
"role": "Gesprächspartner"
},
{
"name": "Pierre-André Tschanz / Journaliste RSI"
},
{
"name": "Pierre-André Tschanz / Journaliste RSI",
"role": "Interviewer"
},
{
"name": "Catherine Miskiewicz / Journaliste RSI"
},
{
"name": "Catherine Miskiewicz",
"role": "ModeratorIn"
},
{
"name": "Pierre-André Tschanz",
"role": "ModeratorIn"
},
{
"name": "Pierre-André Tschanz",
"role": "Autor"
},
{
"name": "Catherine Miskiewicz",
"role": "Autor"
}
]
}
\ No newline at end of file
<https://memobase.ch/record/test-001-identifier> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.ica.org/standards/RiC/ontology#Record> .
<https://memobase.ch/record/test-001-identifier> <https://memobase.ch/internal/isPublished> "false"^^<http://www.w3.org/2001/XMLSchema#boolean> .
<https://memobase.ch/record/test-001-identifier> <https://www.ica.org/standards/RiC/ontology#heldBy> <https://memobase.ch/institution/test> .
<https://memobase.ch/record/test-001-identifier> <https://www.ica.org/standards/RiC/ontology#identifiedBy> _:B .
<https://memobase.ch/record/test-001-identifier> <https://www.ica.org/standards/RiC/ontology#isPartOf> <https://memobase.ch/recordSet/test-001> .
<https://memobase.ch/record/test-001-identifier> <https://www.ica.org/standards/RiC/ontology#type> "Foto" .
_:B <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.ica.org/standards/RiC/ontology#Identifier> .
_:B <https://www.ica.org/standards/RiC/ontology#identifier> "test-001-identifier" .
_:B <https://www.ica.org/standards/RiC/ontology#type> "main" .
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ baseline-preprocessor
~ Copyright (C) 2019 Project Swissbib <http://swissbib.org>
~
~ This program is free software: you can redistribute it and/or modify
~ it under the terms of the GNU Affero General Public License as published by
~ the Free Software Foundation, either version 3 of the License, or
~ (at your option) any later version.
~
~ This program is distributed in the hope that it will be useful,
~ but WITHOUT ANY WARRANTY; without even the implied warranty of
~ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
~ GNU Affero General Public License for more details.
~
~ You should have received a copy of the GNU Affero General Public License
~ along with this program. If not, see <https://www.gnu.org/licenses/>.
-->
<Configuration status="info" name="skeleton-app" packages="">
<Appenders>
<Console name="STDOUT" target="SYSTEM_OUT">
<PatternLayout pattern="[%-5level] [%c{1}] %m%n"/>
</Console>
</Appenders>
<Loggers>
<Root level="debug">
<AppenderRef ref="STDOUT"/>
</Root>
</Loggers>
</Configuration>
\ No newline at end of file
......@@ -5,6 +5,15 @@
@prefix ebucore: <http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
_:b0 a rico:CreationRelation ;
rico:creationRelationHasSource <https://memobase.ch/record/rs1-1> ;
rico:creationRelationHasTarget [ a rico:Person ;
rico:agentIsTargetOfCreationRelation
_:b0 ;
rico:name "Hans Zimmer"
] ;
rico:type "creator" .
<https://memobase.ch/record/rs1-1>
a rico:Record ;
rdau:P60451 <https://memobase.ch/institution/mrv> ;
......@@ -19,12 +28,3 @@
rico:recordResourceOrInstantiationIsSourceOfCreationRelation
_:b0 ;
rico:type "Foto" .
_:b0 a rico:CreationRelation ;
rico:creationRelationHasSource <https://memobase.ch/record/rs1-1> ;
rico:creationRelationHasTarget [ a rico:Person ;
rico:agentIsTargetOfCreationRelation
_:b0 ;
rico:name "Hans Zimmer"
] ;
rico:type "creator" .
......@@ -20,31 +20,31 @@
_:b0 , _:b1 , _:b2 ;
rico:type "Foto" .
_:b1 a rico:CreationRelation ;
_:b2 a rico:CreationRelation ;
rico:creationRelationHasSource <https://memobase.ch/record/rs1-1> ;
rico:creationRelationHasTarget [ a rico:Person ;
rico:agentIsTargetOfCreationRelation
_:b1 ;
rico:name "Third Person"
_:b2 ;
rico:name "First Person"
] ;
rico:name "Relation 3" ;
rico:name "Relation 1" ;
rico:type "creator" .
_:b0 a rico:CreationRelation ;
_:b1 a rico:CreationRelation ;
rico:creationRelationHasSource <https://memobase.ch/record/rs1-1> ;
rico:creationRelationHasTarget [ a rico:Person ;
rico:agentIsTargetOfCreationRelation
_:b0 ;
rico:name "First Person"
_:b1 ;
rico:name "Third Person"
] ;
rico:name "Relation 1" ;
rico:name "Relation 3" ;
rico:type "creator" .
_:b2 a rico:CreationRelation ;
_:b0 a rico:CreationRelation ;
rico:creationRelationHasSource <https://memobase.ch/record/rs1-1> ;
rico:creationRelationHasTarget [ a rico:Person ;
rico:agentIsTargetOfCreationRelation
_:b2 ;
_:b0 ;
rico:name "Second Person"
] ;
rico:type "creator" .
......@@ -5,105 +5,6 @@
@prefix ebucore: <http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
_:b0 a rico:CreationRelation ;
rico:creationRelationHasSource <https://memobase.ch/record/test-001-identifier> ;
rico:creationRelationHasTarget [ a rico:Person ;
rico:agentIsTargetOfCreationRelation
_:b0 ;
rico:name "Flavio Maspoli / Conseiller national Lega Tessin"
] ;
rico:name "Gesprächspartner" ;
rico:type "contributor" .
_:b1 a rico:CreationRelation ;
rico:creationRelationHasSource <https://memobase.ch/record/test-001-identifier> ;
rico:creationRelationHasTarget [ a rico:Person ;
rico:agentIsTargetOfCreationRelation
_:b1 ;
rico:name "Suzette Sandoz / Conseillère nationale PL Vaud"
] ;
rico:name "Gesprächspartner" ;
rico:type "contributor" .
_:b2 a rico:CreationRelation ;
rico:creationRelationHasSource <https://memobase.ch/record/test-001-identifier> ;
rico:creationRelationHasTarget [ a rico:Person ;
rico:agentIsTargetOfCreationRelation
_:b2 ;
rico:name "Flavio Cotti / Conseiller fédéral DFAE"
] ;
rico:name "Gesprächspartner" ;
rico:type "contributor" .
_:b3 a rico:CreationRelation ;
rico:creationRelationHasSource <https://memobase.ch/record/test-001-identifier> ;
rico:creationRelationHasTarget [ a rico:Person ;
rico:agentIsTargetOfCreationRelation
_:b3 ;
rico:name "Jacques Martin / Conseiller aux Etats PRD Vaud"
] ;
rico:name "Gesprächspartner" ;
rico:type "contributor" .
_:b4 a rico:CreationRelation ;
rico:creationRelationHasSource <https://memobase.ch/record/test-001-identifier> ;
rico:creationRelationHasTarget [ a rico:Person ;
rico:agentIsTargetOfCreationRelation
_:b4 ;
rico:name "Markus Ruf / Conseiller national Démocrates suisses Berne"
] ;
rico:name "Gesprächspartner" ;
rico:type "contributor" .
_:b5 a rico:CreationRelation ;
rico:creationRelationHasSource <https://memobase.ch/record/test-001-identifier> ;
rico:creationRelationHasTarget [ a rico:Person ;
rico:agentIsTargetOfCreationRelation
_:b5 ;
rico:name "Catherine Miskiewicz / Journaliste RSI"
] ;
rico:type "contributor" .
_:b6 a rico:CreationRelation ;
rico:creationRelationHasSource <https://memobase.ch/record/test-001-identifier> ;
rico:creationRelationHasTarget [ a rico:Person ;
rico:agentIsTargetOfCreationRelation
_:b6 ;
rico:name "Irène Gardiol / Conseillère nationale écologiste Vaud"
] ;
rico:name "Gesprächspartner" ;
rico:type "contributor" .
_:b7 a rico:CreationRelation ;