Commit f5b97f8b authored by Jonas Waeber's avatar Jonas Waeber
Browse files

Refactor GlobalTransformLoader

Improve error handling for global transform loader
Make configuration validation much more strict for date normalizer.
parent bdf2a927
......@@ -16,6 +16,8 @@
package ch.memobase.configs
import ch.memobase.exceptions.InvalidMappingException
import ch.memobase.helpers.GlobalTransformException
import ch.memobase.helpers.ValidationError
import ch.memobase.model.GlobalTransform
import ch.memobase.transform.ITransformer
import com.fasterxml.jackson.databind.ObjectMapper
......@@ -25,51 +27,65 @@ import com.fasterxml.jackson.module.kotlin.registerKotlinModule
import org.apache.logging.log4j.LogManager
import java.io.File
import java.io.FileInputStream
import java.io.IOException
class GlobalTransformsLoader(file: String) {
class GlobalTransformsLoader(private val configuration: String) {
private val log = LogManager.getLogger(this::class.java)
private val objectMapper = ObjectMapper(YAMLFactory()).registerKotlinModule()
private val transforms: GlobalTransform
private val transforms = mutableListOf<ITransformer>()
init {
transforms = try {
objectMapper.readValue(FileInputStream(File(file)), GlobalTransform::class.java)
fun parse() {
val globalTransform = try {
objectMapper.readValue(FileInputStream(File(configuration)), GlobalTransform::class.java)
} catch (ex: IOException) {
log.error("File Not Found: $configuration.")
throw GlobalTransformException("File Not Found: $configuration.")
} catch (ex: MismatchedInputException) {
log.error("The global transform file could not be parsed: ${ex.localizedMessage}.")
throw InvalidMappingException("The global transform file could not be parsed: ${ex.localizedMessage}.")
log.error("YamlParserError: ${ex.localizedMessage}")
throw GlobalTransformException("YamlParserError: ${ex.localizedMessage}")
} catch (ex: Exception) {
log.error("Unknown exception while parsing global transform: ${ex.localizedMessage}.")
throw InvalidMappingException("Unknown exception while parsing global transform: ${ex.localizedMessage}.")
log.error("${ex}: ${ex.localizedMessage}")
throw GlobalTransformException("${ex}: ${ex.localizedMessage}")
}
}
fun get(): List<ITransformer> {
val parsedTransformers = mutableListOf<ITransformer>()
transforms.let {
it.normalizeDate.let { date ->
if (date != null)
parsedTransformers.add(date.generate())
}
it.normalizeCarrierType.let { carrierType ->
if (carrierType != null) {
parsedTransformers.add(carrierType.generate())
try {
globalTransform.let {
it.normalizeDate.let { date ->
if (date != null)
transforms.add(date.generate())
}
}
it.normalizeLanguages.let { normalizeLanguages ->
if (normalizeLanguages != null) {
parsedTransformers.add(normalizeLanguages.generate())
it.normalizeCarrierType.let { carrierType ->
if (carrierType != null) {
transforms.add(carrierType.generate())
}
}
}
it.normalizeGenre.let { normalizeGenre ->
if (normalizeGenre != null) {
parsedTransformers.add(normalizeGenre.generate())
it.normalizeLanguages.let { normalizeLanguages ->
if (normalizeLanguages != null) {
transforms.add(normalizeLanguages.generate())
}
}
it.normalizeGenre.let { normalizeGenre ->
if (normalizeGenre != null) {
transforms.add(normalizeGenre.generate())
}
}
}
} catch (ex: InvalidMappingException) {
log.error("Invalid Mapping: ${ex.localizedMessage}")
throw GlobalTransformException("Invalid Mapping: ${ex.localizedMessage}")
} catch (ex: ValidationError) {
log.error("Validation Error: ${ex.localizedMessage}")
throw GlobalTransformException("Validation Error: ${ex.localizedMessage}")
} catch (ex: Exception) {
log.error("${ex}: ${ex.localizedMessage}")
throw GlobalTransformException("${ex}: ${ex.localizedMessage}")
}
if (parsedTransformers.isEmpty()) {
log.warn("Global transforms is empty!")
if (transforms.isEmpty()) {
log.error("Empty configuration: $configuration!")
throw GlobalTransformException("Empty configuration: $configuration!")
}
return parsedTransformers
}
fun get(): List<ITransformer> {
return transforms
}
}
\ No newline at end of file
/*
Copyright 2020 Jonas Waeber
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package ch.memobase.helpers
/**
* This exception is thrown if there is an issue with the global transform loader.
*/
class GlobalTransformException(message: String) : Exception(message)
......@@ -15,9 +15,13 @@
*/
package ch.memobase.model
import ch.memobase.exceptions.InvalidMappingException
import ch.memobase.helpers.ValidationError
import ch.memobase.transform.DateNormalizationTransform
import java.io.IOException
import java.nio.file.Files
import java.nio.file.Paths
import java.util.regex.PatternSyntaxException
data class NormalizeDate(
val qualifiers: String,
......@@ -25,16 +29,83 @@ data class NormalizeDate(
val singeDateMatchers: String,
val dateRangeMatchers: String
) {
fun generate(): DateNormalizationTransform {
val qualifierRegexList = validate(qualifiers, "Qualifier",
{ index: Int, value: String ->
if (!value.contains(Regex("\\(\\?<qualifier>.*\\)"))) {
throw ValidationError("[Date:Qualifier] Regex does not contain group 'qualifier' (Line: ${index + 1}).")
}
}, true
)
val certaintyRegexList = validate(certainties, "Certainty",
{ index: Int, value: String ->
if (!value.contains(Regex("\\(\\?<certainty>.*\\)"))) {
throw ValidationError("[Date:Certainty] Regex does not contain group 'certainty' (Line: ${index + 1}).")
}
}, true
)
val singleDateRegexList = validate(singeDateMatchers, "SingleDate",
{ index: Int, value: String ->
if (!value.contains(Regex("\\(\\?<day>.*\\)"))) {
throw ValidationError("[Date:SingleDate] Regex does not contain group 'day' (Line: ${index + 1}).")
}
if (!value.contains(Regex("\\(\\?<month>.*\\)"))) {
throw ValidationError("[Date:SingleDate] Regex does not contain group 'month' (Line: ${index + 1}).")
}
if (!value.contains(Regex("\\(\\?<year>.*\\)"))) {
throw ValidationError("[Date:SingleDate] Regex does not contain group 'year' (Line: ${index + 1}).")
}
}, false
)
val dateRangeRegexList = validate(dateRangeMatchers, "DateRange",
{ index: Int, value: String ->
if (!value.contains(Regex("\\(\\?<singleYear>.*\\)"))) {
if (!value.contains(Regex("\\(\\?<fromYear>.*\\)")) && !value.contains(Regex("\\(\\?<untilYear>.*\\)"))) {
throw ValidationError("[Date:DateRange] Regex does not contain group 'qualifier' (Line: ${index + 1}).")
}
}
}, false
)
return DateNormalizationTransform(
singleDateMatchers = getLines(singeDateMatchers).map { value -> Regex(value) },
dateRangeMatchers = getLines(dateRangeMatchers).map { value -> Regex(value) },
qualifierValues = getLines(qualifiers).map { value -> Regex(value, RegexOption.IGNORE_CASE) },
certaintyValues = getLines(certainties).map { value -> Regex(value, RegexOption.IGNORE_CASE) }
qualifierValues = qualifierRegexList,
certaintyValues = certaintyRegexList,
singleDateMatchers = singleDateRegexList,
dateRangeMatchers = dateRangeRegexList
)
}
private fun validate(
path: String,
type: String,
check: (Int, String) -> Unit,
option: Boolean
): List<Regex> {
return getLines(path).mapIndexed { index, value ->
check(index, value)
try {
if (option)
Regex(value, RegexOption.IGNORE_CASE)
else
Regex(value)
} catch (ex: PatternSyntaxException) {
throw ValidationError(
"[Date:${type}] Invalid Regex Pattern: ${
ex.localizedMessage.replace(
"\n",
" "
)
} (Line: ${index + 1})."
)
}
}
}
private fun getLines(path: String): List<String> {
return Files.newBufferedReader(Paths.get(path)).readLines()
try {
return Files.newBufferedReader(Paths.get(path)).readLines()
} catch (ex: IOException) {
throw InvalidMappingException("File Not Found: ${ex.localizedMessage}.")
}
}
}
......@@ -15,15 +15,12 @@
*/
package ch.memobase.rdf
import java.io.StringWriter
import org.apache.jena.rdf.model.ModelFactory
import org.apache.jena.rdf.model.Property
import org.apache.jena.rdf.model.RDFNode
import org.apache.jena.rdf.model.Resource
import org.apache.jena.rdf.model.Statement
import ch.memobase.exceptions.InvalidInputException
import org.apache.jena.rdf.model.*
import org.apache.jena.riot.RDFDataMgr
import org.apache.jena.riot.RDFFormat
import org.apache.logging.log4j.LogManager
import java.io.StringWriter
class RicoResource(val resource: Resource) {
constructor(resource: Resource, rdfType: Resource) : this(resource.addProperty(RDF.type, rdfType))
......
......@@ -16,20 +16,61 @@
package ch.memobase
import ch.memobase.configs.GlobalTransformsLoader
import ch.memobase.helpers.GlobalTransformException
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.TestInstance
import org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS
import org.junit.jupiter.api.assertThrows
@TestInstance(PER_CLASS)
class TestGlobalTransformLoader {
private val basePath = "src/test/resources/global/tests"
@Test
fun `test global loader`() {
fun `test full valid global loader`() {
val file = "src/test/resources/global/transforms.yml"
val global = GlobalTransformsLoader(file)
global.parse()
val transforms = global.get()
assertThat(transforms)
.isNotEmpty
}
@Test
fun `test 1 - invalid transform path 1`() {
val file = "/test/invalid/path/transforms.yml"
val global = GlobalTransformsLoader(file)
assertThrows<GlobalTransformException> {
global.parse()
}
}
@Test
fun `test 2 - missing date normalization file`() {
val file = "$basePath/2/transforms.yml"
val global = GlobalTransformsLoader(file)
assertThrows<GlobalTransformException> {
global.parse()
}
}
@Test
fun `test 3 - invalid regex pattern in date`() {
val file = "$basePath/3/transforms.yml"
val global = GlobalTransformsLoader(file)
assertThrows<GlobalTransformException> {
global.parse()
}
}
@Test
fun `test 4 - missing qualifier group in qualifier regex`() {
val file = "$basePath/4/transforms.yml"
val global = GlobalTransformsLoader(file)
assertThrows<GlobalTransformException> {
global.parse()
}
}
}
\ No newline at end of file
normalizeDate:
qualifiers: /invalid/path/qualifier.txt
certainties: ""
singeDateMatchers: ""
dateRangeMatchers: ""
\ No newline at end of file
[\s+]\(?(?<qualifier>um)\)?[^a-zA-Z]
[
(?<qualifier>ca\.)
(?<qualifier>ou avant)
\ No newline at end of file
normalizeDate:
qualifiers: src/test/resources/global/tests/3/qualifier.txt
certainties: ""
singeDateMatchers: ""
dateRangeMatchers: ""
\ No newline at end of file
ou avant
\ No newline at end of file
normalizeDate:
qualifiers: src/test/resources/global/tests/4/qualifier.txt
certainties: ""
singeDateMatchers: ""
dateRangeMatchers: ""
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment