Commit 60a98660 authored by Jonas Waeber's avatar Jonas Waeber
Browse files

[WIP] First implementation of the mapper rework to accept lists of strings in JSON.

parent 2389aa73
Pipeline #21277 passed with stage
in 2 minutes and 8 seconds
......@@ -18,9 +18,12 @@
package ch.memobase.mapping.fields
import ch.memobase.mapping.fields.SourceElement.SimpleString
import ch.memobase.mapping.fields.SourceElement.StringList
import mapping.fields.LanguagePair
import org.apache.jena.rdf.model.Literal
import org.apache.jena.rdf.model.ResourceFactory
import org.apache.logging.log4j.LogManager
sealed class ConfigField {
......@@ -67,43 +70,92 @@ sealed class ConfigField {
}
}
sealed class ComplexAnnotationField() : AnnotationField() {
sealed class ComplexAnnotationField : AnnotationField() {
data class ListField(val key: String, val fields: List<SimpleAnnotationField>) : ComplexAnnotationField() {
fun toLiterals(source: Map<String, Any>): List<Literal> {
return fields.mapNotNull {
when (it) {
private val log = LogManager.getLogger(this::class.java)
fun toLiterals(source: Map<String, Any>): List<List<Literal>> {
val resultList = mutableListOf<MutableList<Literal>>()
for (field in fields) {
when (field) {
is MappedAnnotationField ->
FieldParsers.unpackSource(it.field, source).let { value ->
if (value != null) {
it.toLiteral(value)
} else {
null
FieldParsers.unpackSource(field.field, source).let { value ->
when (value) {
is SimpleString ->
if (resultList.size == 1) {
resultList[0].add(field.toLiteral(value.value))
} else {
resultList.add(mutableListOf(field.toLiteral(value.value)))
}
is StringList ->
value.value.forEachIndexed { index, s ->
// This makes the assumption that each value within each field
// is at the same index for a specific field.
if (resultList.size >= index + 1) {
resultList[index].add(field.toLiteral(s))
} else {
resultList.add(mutableListOf(field.toLiteral(s)))
}
}
else -> log.debug("No value found for field $field.")
}
}
is ConstantField -> it.toLiteral()
is ConstantField ->
// This will cause an issue if there is a constant value added for a field
// with multiple entries.
if (resultList.size == 1) {
resultList[0].add(field.toLiteral())
} else {
resultList.add(mutableListOf(field.toLiteral()))
}
}
}
return resultList
}
}
data class LanguageField(val key: String, val fields: List<LanguagePair>) :
ComplexAnnotationField() {
fun toLangLiterals(source: Map<String, Any>): List<Literal> {
return fields.flatMap { languagePair ->
languagePair.sources.mapNotNull {
when (it) {
private val log = LogManager.getLogger(this::class.java)
fun toLangLiterals(source: Map<String, Any>): List<List<Literal>> {
val resultList = mutableListOf<MutableList<Literal>>()
for (languagePair in fields) {
val tag = languagePair.tag
languagePair.sources.forEach { field ->
when (field) {
is MappedAnnotationField ->
FieldParsers.unpackSource(it.field, source).let { value ->
if (value != null) {
it.toLangLiteral(value, languagePair.tag)
} else {
null
FieldParsers.unpackSource(key, source).let { value ->
when (value) {
is SimpleString ->
if (resultList.size == 1) {
resultList[0].add(field.toLangLiteral(value.value, tag))
} else {
resultList.add(mutableListOf(field.toLangLiteral(value.value, tag)))
}
is StringList ->
value.value.forEachIndexed { index, s ->
// This makes the assumption that each value within each field
// is at the same index for a specific field.
if (resultList.size >= index + 1) {
resultList[index].add(field.toLangLiteral(s, tag))
} else {
resultList.add(mutableListOf(field.toLangLiteral(s, tag)))
}
}
else -> log.debug("No value found for field $field.")
}
}
is ConstantField -> it.toLangLiteral(languagePair.tag)
is ConstantField ->
// This will cause an issue if there is a constant value added for a field
// with multiple entries.
if (resultList.size == 1) {
resultList[0].add(field.toLangLiteral(tag))
} else {
resultList.add(mutableListOf(field.toLangLiteral(tag)))
}
}
}
}
return resultList
}
}
}
......
......@@ -19,10 +19,16 @@
package ch.memobase.mapping.fields
import ch.memobase.exceptions.InvalidMappingException
import ch.memobase.mapping.fields.SourceElement.Empty
import ch.memobase.mapping.fields.SourceElement.SimpleString
import ch.memobase.mapping.fields.SourceElement.StringList
import mapping.fields.LanguagePair
import org.apache.logging.log4j.LogManager
@Suppress("UNCHECKED_CAST")
object FieldParsers {
private val log = LogManager.getLogger(this::class.java)
private fun parsePrefixField(key: String, values: Map<String, String>): PrefixField {
return values["field"].let { field ->
if (field != null) {
......@@ -106,8 +112,8 @@ object FieldParsers {
}
}
is List<*> -> return ListField(
entry.key,
parseFieldList(entry.key, value as List<Any>)
entry.key,
parseFieldList(entry.key, value as List<Any>)
)
else -> throw InvalidMappingException("Unknown structure for field mapping: $entry")
}
......@@ -139,18 +145,85 @@ object FieldParsers {
}
}
fun unpackSource(field: String, source: Map<String, Any>): String? {
/**
* Unpacks the source data JSON. This is the only place in this project where the values are extracted from the source.
* The SourceElement is a simple wrapper around all the possible states the source data is allowed to take.
* The dot notation tells the parser if there is an object present in the source data. This is way property names may not
* contain a dot.
*
* Currently only Strings, Lists of Strings and Objects can be parsed.
* Objects may only contain Strings or List of Strings.
*/
fun unpackSource(field: String, source: Map<String, Any>): SourceElement {
return if (field.contains('.')) {
val fields = field.split('.')
source[fields[0]].let {
if (it is Map<*, *>) {
it[fields[1]] as String?
} else {
null
source[fields[0]].let { objectValue ->
when (objectValue) {
is Map<*, *> -> {
if (objectValue.containsKey(fields[1])) {
unpackValue(field, objectValue[fields[1]])
} else {
Empty
}
}
is List<*> -> {
when {
objectValue.isEmpty() -> Empty
objectValue.size == 1 -> {
val item = objectValue[0]
if (item is Map<*, *>) {
unpackValue(field, item[fields[1]])
} else {
log.error(
"The value in field ${field[0]} inside of the array is " +
"not an object as expected."
)
Empty
}
}
else -> {
val resultList = mutableListOf<String>()
for (item in objectValue) {
if (item is Map<*, *>) {
when (val unpackedValue = unpackValue(field, item[fields[1]])) {
is SimpleString -> resultList.add(unpackedValue.value)
is StringList -> resultList.addAll(unpackedValue.value)
Empty -> log.debug("No or illegal value in map found.")
}
}
}
StringList(resultList)
}
}
}
null -> Empty
else -> {
log.error("Could not parse object for field $field in source.")
Empty
}
}
}
} else {
source[field] as String?
unpackValue(field, source[field])
}
}
private fun unpackValue(field: String, value: Any?): SourceElement {
return when (value) {
is String -> SimpleString(value)
is List<*> -> {
if (value.isNotEmpty() && value[0] is String) {
StringList(value as List<String>)
} else {
log.error("Could not parse list elements in field $field. It is either an empty list or is not a string.")
Empty
}
}
null -> Empty
else -> {
log.error("Could not parse element in field $field. The value is neither a string nor null.")
Empty
}
}
}
}
/*
* mapper-service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package ch.memobase.mapping.fields
sealed class SourceElement {
data class SimpleString(val value: String) : SourceElement()
data class StringList(val value: List<String>) : SourceElement()
object Empty : SourceElement()
}
......@@ -25,9 +25,9 @@ class AgentFieldMapper : TypeFieldMapper() {
var sourceKey: String = ""
override fun apply(source: Map<String, Any>, subject: IResource) {
translateProperties(source)
if (properties.size > 0) {
val relationNameField = properties.filter { it.first == KEYS.relationName }
val otherFields = properties.filterNot { it.first == KEYS.relationName }
properties.forEach { propertyList ->
val relationNameField = propertyList.filter { it.first == KEYS.relationName }
val otherFields = propertyList.filterNot { it.first == KEYS.relationName }
if (otherFields.isNotEmpty()) {
if (relationNameField.isEmpty() && KEYS.agentPropertiesMap.containsKey(sourceKey)) {
subject.addAgent(sourceKey, agentClassType, otherFields)
......
......@@ -25,28 +25,39 @@ import ch.memobase.mapping.fields.FieldParsers
import ch.memobase.mapping.fields.LanguageField
import ch.memobase.mapping.fields.ListField
import ch.memobase.mapping.fields.MappedAnnotationField
import ch.memobase.mapping.fields.SourceElement
import ch.memobase.mapping.fields.SourceElement.Empty
import ch.memobase.mapping.fields.SourceElement.SimpleString
import ch.memobase.mapping.fields.SourceElement.StringList
import org.apache.logging.log4j.LogManager
class CarrierTypeMapper(val field: AnnotationField) : AbstractFieldMapper() {
private val log = LogManager.getLogger(this::class.java)
override fun apply(source: Map<String, Any>, subject: IResource) {
when (field) {
is MappedAnnotationField ->
FieldParsers.unpackSource(field.field, source).let {
if (it != null) {
subject.addRicoCarrierType(listOf(field.toLiteral(it)))
FieldParsers.unpackSource(field.field, source).let { sourceElement: SourceElement ->
when (sourceElement) {
is SimpleString -> subject.addRicoCarrierType(listOf(field.toLiteral(sourceElement.value)))
is StringList ->
sourceElement.value.forEach {
subject.addRicoCarrierType(listOf(field.toLiteral(it)))
}
is Empty -> log.debug("No value for field ${field.key} in source data.")
}
}
is ConstantField ->
subject.addRicoCarrierType(listOf(field.toLiteral()))
is LanguageField -> {
val fields = field.toLangLiterals(source)
if (fields.isNotEmpty()) {
subject.addRicoCarrierType(fields)
fields.forEach {
subject.addRicoCarrierType(it)
}
}
is ListField -> {
val fields = field.toLiterals(source)
if (fields.isNotEmpty()) {
subject.addRicoCarrierType(field.toLiterals(source))
fields.forEach {
subject.addRicoCarrierType(it)
}
}
}
......
......@@ -21,12 +21,22 @@ package ch.memobase.mapping.mappers
import ch.memobase.builder.IResource
import ch.memobase.mapping.fields.DirectMapField
import ch.memobase.mapping.fields.FieldParsers
import ch.memobase.mapping.fields.SourceElement
import ch.memobase.mapping.fields.SourceElement.Empty
import ch.memobase.mapping.fields.SourceElement.SimpleString
import ch.memobase.mapping.fields.SourceElement.StringList
import org.apache.logging.log4j.LogManager
class DateFieldMapper(val directMapField: DirectMapField) : AbstractFieldMapper() {
class DateFieldMapper(private val directMapField: DirectMapField) : AbstractFieldMapper() {
private val log = LogManager.getLogger(this::class.java)
override fun apply(source: Map<String, Any>, subject: IResource) {
FieldParsers.unpackSource(directMapField.field, source).let {
if (it != null) {
subject.addDate(directMapField.key, it)
FieldParsers.unpackSource(directMapField.field, source).let { sourceElement: SourceElement ->
when (sourceElement) {
is SimpleString -> subject.addDate(directMapField.key, sourceElement.value)
is StringList -> sourceElement.value.forEach {
subject.addDate(directMapField.key, it)
}
is Empty -> log.debug("Found no element for field ${directMapField.key}")
}
}
}
......
......@@ -21,12 +21,22 @@ package ch.memobase.mapping.mappers
import ch.memobase.builder.IResource
import ch.memobase.mapping.fields.DirectMapField
import ch.memobase.mapping.fields.FieldParsers
import ch.memobase.mapping.fields.SourceElement
import ch.memobase.mapping.fields.SourceElement.Empty
import ch.memobase.mapping.fields.SourceElement.SimpleString
import ch.memobase.mapping.fields.SourceElement.StringList
import org.apache.logging.log4j.LogManager
class DirectFieldMapper(val directMapField: DirectMapField) : AbstractFieldMapper() {
class DirectFieldMapper(private val directMapField: DirectMapField) : AbstractFieldMapper() {
private val log = LogManager.getLogger(this::class.java)
override fun apply(source: Map<String, Any>, subject: IResource) {
FieldParsers.unpackSource(directMapField.field, source).let {
if (it != null) {
subject.addLiteral(directMapField.key, directMapField.toLiteral(it))
FieldParsers.unpackSource(directMapField.field, source).let { sourceElement: SourceElement ->
when (sourceElement) {
is SimpleString -> subject.addLiteral(directMapField.key, directMapField.toLiteral(sourceElement.value))
is StringList -> sourceElement.value.forEach {
subject.addLiteral(directMapField.key, directMapField.toLiteral(it))
}
is Empty -> log.debug("Found no element for field ${directMapField.key}.")
}
}
}
......
......@@ -25,45 +25,78 @@ import ch.memobase.mapping.fields.FieldParsers
import ch.memobase.mapping.fields.LanguageField
import ch.memobase.mapping.fields.ListField
import ch.memobase.mapping.fields.MappedAnnotationField
import ch.memobase.mapping.fields.SourceElement
import ch.memobase.mapping.fields.SourceElement.Empty
import ch.memobase.mapping.fields.SourceElement.SimpleString
import ch.memobase.mapping.fields.SourceElement.StringList
import org.apache.jena.rdf.model.Literal
import org.apache.logging.log4j.LogManager
class ExpandedRuleFieldMapper(
val ricoType: String,
val configField: List<ConfigField>
private val ricoType: String,
private val configField: List<ConfigField>
) : AbstractFieldMapper() {
private val log = LogManager.getLogger(this::class.java)
override fun apply(source: Map<String, Any>, subject: IResource) {
val properties = configField.mapNotNull { field ->
val properties = mutableListOf<MutableList<Pair<String, List<Literal>>>>()
configField.forEach { field ->
when (field) {
is MappedAnnotationField ->
FieldParsers.unpackSource(field.field, source).let {
if (it != null) {
Pair(field.key, listOf(field.toLiteral(it)))
} else {
null
FieldParsers.unpackSource(field.field, source).let { sourceElement: SourceElement ->
when (sourceElement) {
is SimpleString ->
if (properties.size == 1)
properties[0].add(Pair(field.key, listOf(field.toLiteral(sourceElement.value))))
else
properties.add(
mutableListOf(
Pair(
field.key,
listOf(field.toLiteral(sourceElement.value))
)
)
)
is StringList ->
sourceElement.value.forEachIndexed { index, s ->
if (properties.size == index + 1)
properties[index].add(Pair(field.key, listOf(field.toLiteral(s))))
else
properties.add(
index,
mutableListOf(Pair(field.key, listOf(field.toLiteral(s))))
)
}
Empty -> log.debug("Could not find a valid value for field ${field.key}.")
}
}
is ConstantField ->
Pair(field.key, listOf(field.toLiteral()))
if (properties.size == 1)
properties[0].add(Pair(field.key, listOf(field.toLiteral())))
else
properties.add(mutableListOf(Pair(field.key, listOf(field.toLiteral()))))
is LanguageField -> {
val pair = Pair(field.key, field.toLangLiterals(source))
if (pair.second.isNotEmpty()) {
pair
} else {
null
val literals = field.toLangLiterals(source)
literals.forEachIndexed { index, list ->
if (properties.size == index + 1) {
properties[index].add(Pair(field.key, list))
} else
properties.add(index, mutableListOf(Pair(field.key, list)))
}
}
is ListField -> {
val pair = Pair(field.key, field.toLiterals(source))
if (pair.second.isNotEmpty()) {
pair
} else {
null
val literals = field.toLiterals(source)
literals.forEachIndexed { index, list ->
if (properties.size == index + 1)
properties[index].add(Pair(field.key, list))
else
properties.add(index, mutableListOf(Pair(field.key, list)))
}
}
}
}
if (properties.isNotEmpty()) {
subject.addRule(ricoType, properties)
properties.forEach {
subject.addRule(ricoType, it)
}
}
}
......@@ -20,11 +20,14 @@ package ch.memobase.mapping.mappers
import ch.memobase.builder.IResource
import ch.memobase.mapping.fields.LanguageField
import org.apache.jena.rdf.model.Literal
class LanguageFieldMapper(val languageField: LanguageField) : AbstractFieldMapper() {
class LanguageFieldMapper(private val languageField: LanguageField) : AbstractFieldMapper() {
override fun apply(source: Map<String, Any>, subject: IResource) {
languageField.toLangLiterals(source).forEach {
subject.addLiteral(languageField.key, it)
languageField.toLangLiterals(source).forEach { entityList: List<Literal> ->
entityList.forEach { literal ->
subject.addLiteral(languageField.key, literal)
}
}
}
}
......@@ -21,11 +21,13 @@ package ch.memobase.mapping.mappers
import ch.memobase.builder.IResource
import ch.memobase.mapping.fields.ListField
class ListFieldMapper(val listField: ListField) :
class ListFieldMapper(private val listField: ListField) :
AbstractFieldMapper() {
override fun apply(source: Map<String, Any>, subject: IResource) {
listField.toLiterals(source).forEach {
subject.addLiteral(listField.key, it)
listField.toLiterals(source).forEach { entityList ->
entityList.forEach {
subject.addLiteral(listField.key, it)
}
}
}
}
......@@ -23,8 +23,8 @@ import ch.memobase.builder.IResource
class PlaceFieldMapper : TypeFieldMapper() {
override fun apply(source: Map<String, Any>, subject: IResource) {
translateProperties(source)
if (properties.size > 0) {
subject.addPlace(agentClassType, properties)
properties.forEach {
subject.addPlace(agentClassType, it)
}
}
}
......@@ -21,12 +21,22 @@ package ch.memobase.mapping.mappers
import ch.memobase.builder.IResource
import ch.memobase.mapping.fields.FieldParsers
import ch.memobase.mapping.fields.PrefixField
import ch.memobase.mapping.fields.SourceElement
import ch.memobase.mapping.fields.SourceElement.Empty
import ch.memobase.mapping.fields.SourceElement.SimpleString
import ch.memobase.mapping.fields.SourceElement.StringList
import org.apache.logging.log4j.LogManager
class PrefixFieldMapper(val prefixField: PrefixField) : AbstractFieldMapper() {
class PrefixFieldMapper(private val prefixField: PrefixField) : AbstractFieldMapper() {
private val log = LogManager.getLogger(this::class.java)
override fun apply(source: Map<String, Any>, subject: IResource) {
FieldParsers.unpackSource(prefixField.field, source).let {
if (it != null) {
subject.addLiteral(prefixField.key, prefixField.toLiteral(it))
FieldParsers.unpackSource(prefixField.field, source).let { sourceElement: SourceElement ->
when (sourceElement) {
is SimpleString -> subject.addLiteral(prefixField.key, prefixField.toLiteral(sourceElement.value))
is StringList -> sourceElement.value.forEach {
subject.addLiteral(prefixField.key, prefixField.toLiteral(it))
}
is Empty -> log.debug("Found no element for field ${prefixField.key}.")
}
}
}
......
......@@ -25,29 +25,39 @@ import ch.memobase.mapping.fields.FieldParsers
import ch.memobase.mapping.fields.LanguageField