Commit a0834270 authored by Jonas Waeber's avatar Jonas Waeber
Browse files

second commit

parent 37d83d00
Pipeline #8584 passed with stages
in 7 minutes and 8 seconds
variables:
DOCKER_TLS_CERTDIR: ""
stages: stages:
- test - test
- publish - publish
......
...@@ -21,8 +21,9 @@ targetCompatibility = 1.8 ...@@ -21,8 +21,9 @@ targetCompatibility = 1.8
repositories { repositories {
jcenter() jcenter()
maven { url 'https://gitlab.com/api/v4/projects/11507450/packages/maven' } maven {
maven { url 'https://jitpack.io' } url "https://dl.bintray.com/jonas-waeber/memobase"
}
} }
ext { ext {
...@@ -34,8 +35,7 @@ dependencies { ...@@ -34,8 +35,7 @@ dependencies {
// https://mvnrepository.com/artifact/org.elasticsearch.client/elasticsearch-rest-high-level-client // https://mvnrepository.com/artifact/org.elasticsearch.client/elasticsearch-rest-high-level-client
//compile group: 'org.elasticsearch.client', name: 'elasticsearch-rest-high-level-client', version: '7.1.0' //compile group: 'org.elasticsearch.client', name: 'elasticsearch-rest-high-level-client', version: '7.1.0'
implementation files("/home/jonas/Documents/work/membase/utilities/service-utilities/build/repo/org/memobase/memobase-service-utilities/1.0.0/memobase-service-utilities-1.0.0.jar") implementation 'org.memobase:memobase-service-utilities:1.2.0'
// Logging Framework // Logging Framework
implementation "org.apache.logging.log4j:log4j-api:${log4jV}" implementation "org.apache.logging.log4j:log4j-api:${log4jV}"
...@@ -43,20 +43,13 @@ dependencies { ...@@ -43,20 +43,13 @@ dependencies {
implementation "org.apache.logging.log4j:log4j-slf4j-impl:${log4jV}" implementation "org.apache.logging.log4j:log4j-slf4j-impl:${log4jV}"
// Kafka Imports // Kafka Imports
implementation group: 'org.apache.kafka', name: 'kafka-clients', version: kafkaV //implementation group: 'org.apache.kafka', name: 'kafka-clients', version: kafkaV
implementation "org.apache.kafka:kafka-streams:${kafkaV}" implementation "org.apache.kafka:kafka-streams:${kafkaV}"
implementation 'org.apache.jena:jena:3.14.0' implementation 'org.apache.jena:apache-jena:3.14.0'
// https://mvnrepository.com/artifact/org.apache.kafka/kafka-streams-test-utils
//testCompile group: 'org.apache.kafka', name: 'kafka-streams-test-utils', version: kafkaV
// SFTP Client
implementation 'com.hierynomus:sshj:0.27.0'
// YAML Parser // YAML Parser
implementation 'org.snakeyaml:snakeyaml-engine:2.1' implementation 'org.snakeyaml:snakeyaml-engine:2.1'
// CSV Reader
implementation("com.github.doyaaaaaken:kotlin-csv-jvm:0.7.3")
// JSON Parser // JSON Parser
implementation 'com.beust:klaxon:5.2' implementation 'com.beust:klaxon:5.2'
// Compression // Compression
...@@ -67,12 +60,10 @@ dependencies { ...@@ -67,12 +60,10 @@ dependencies {
implementation "org.jetbrains.kotlin:kotlin-reflect:1.3.71" implementation "org.jetbrains.kotlin:kotlin-reflect:1.3.71"
testCompile("org.junit.jupiter:junit-jupiter:5.4.2") testCompile("org.junit.jupiter:junit-jupiter:5.4.2")
testImplementation 'org.assertj:assertj-core:3.15.0'
// https://mvnrepository.com/artifact/org.apache.kafka/kafka-streams-test-utils // https://mvnrepository.com/artifact/org.apache.kafka/kafka-streams-test-utils
testCompile group: 'org.apache.kafka', name: 'kafka-streams-test-utils', version: kafkaV testCompile group: 'org.apache.kafka', name: 'kafka-streams-test-utils', version: kafkaV
testImplementation "org.apache.kafka:kafka-clients:$kafkaV:test"
testImplementation "org.apache.kafka:kafka_2.11:$kafkaV"
testImplementation "org.apache.kafka:kafka_2.11:$kafkaV:test"
} }
compileKotlin { compileKotlin {
......
...@@ -30,7 +30,11 @@ class App { ...@@ -30,7 +30,11 @@ class App {
@JvmStatic fun main(args: Array<String>) { @JvmStatic fun main(args: Array<String>) {
try { try {
val settings = SettingsLoader( val settings = SettingsLoader(
listOf() listOf(
"institution.id",
"recordSet.id"
),
useStreamsConfig = true
) )
val topology = KafkaTopology(settings).build() val topology = KafkaTopology(settings).build()
val stream = KafkaStreams(topology, settings.kafkaStreamsSettings) val stream = KafkaStreams(topology, settings.kafkaStreamsSettings)
......
/*
* mapper-service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase
object DateHelpers {
private val dateRangePatterns = listOf(
Regex("-|\u2013|\u2014")
)
private val dateSetPatterns = listOf(
Regex("/")
)
private val normalizedDatesPatterns = listOf(
Regex("[1-2][089][0-9][0-9]"),
Regex("[0-3][0-9]-[01][0-9]-[1-2][089][0-9][0-9]")
)
fun isNormalizedDate(value: String): Boolean {
normalizedDatesPatterns.forEach {
val result = it.matchEntire(value)
if (result != null) {
return true
}
}
return false
}
fun isDateRange(value: String): Boolean {
dateRangePatterns.forEach {
val result = it.find(value)
if (result != null) {
return true
}
}
return false
}
fun isDateSet(value: String): Boolean {
dateSetPatterns.forEach {
val result = it.find(value)
if (result != null) {
return true
}
}
return false
}
}
\ No newline at end of file
...@@ -27,10 +27,17 @@ class KafkaTopology(private val settings: SettingsLoader ...@@ -27,10 +27,17 @@ class KafkaTopology(private val settings: SettingsLoader
) { ) {
private val log = LogManager.getLogger("KafkaTopology") private val log = LogManager.getLogger("KafkaTopology")
fun build(): Topology { fun build(): Topology {
val builder = StreamsBuilder() val builder = StreamsBuilder()
return builder.build() return builder.build()
} }
private fun writeTriples() {
}
} }
\ No newline at end of file
/*
* mapper-service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase
import org.apache.jena.rdf.model.Model
import org.apache.logging.log4j.LogManager
import org.memobase.settings.CustomEnvConfig
import org.memobase.settings.MissingSettingException
import org.snakeyaml.engine.v2.api.Load
import org.snakeyaml.engine.v2.api.LoadSettings
import org.snakeyaml.engine.v2.exceptions.MissingEnvironmentVariableException
import java.nio.charset.Charset
import java.nio.file.Files
import java.nio.file.Path
import java.util.Optional
import kotlin.system.exitProcess
class MappingConfig(path: Path) {
val fieldMappers = mutableListOf<FieldMapper>()
private val log = LogManager.getLogger("MappingConfigParser")
init {
val result = Files.list(path).map { file -> file.toFile().readText(Charset.defaultCharset()) }
.reduce("") { s, s2 -> s + "\n" + s2 }.trim()
if (result.isNotEmpty()) {
try {
val rawYaml = loadYaml(result)
val iterable = rawYaml as Iterable<*>
for (item in iterable) {
when (item) {
is Map<*, *> ->
for (key in item.keys) {
when (key as String) {
"record" -> parseRecordConfig(item["record"] as Map<String, Any>)
"physical" -> parsePhysicalInstantiationConfig(item["physical"] as Map<String, Any>)
"digital" -> parseDigitalInstantiationConfig(item["digital"] as Map<String, Any>)
else -> log.error("Unknown key for top level entity: $key")
}
}
else -> log.error("null")
}
}
} catch (ex: ClassCastException) {
log.error(ex.message)
exitProcess(1)
}
} else {
exitProcess(1)
}
}
private fun loadYaml(data: String): Any {
val settings =
LoadSettings.builder().setAllowDuplicateKeys(true).setEnvConfig(Optional.of(CustomEnvConfig())).build()
val load = Load(settings)
return try {
load.loadAllFromString(data)
} catch (ex: MissingEnvironmentVariableException) {
throw MissingSettingException("env", ex.localizedMessage)
}
}
private fun parseRecordConfig(source: Map<String, Any>) {
for (entry in source) {
when (entry.key) {
// literal properties
"name", "descriptiveNote", "scopeAndContent",
"abstract", "source", "hasSponsoringAgentOfResource" -> {
when (val value = parseField(entry)) {
is SimpleField -> fieldMappers.add(StringFieldMapper(value))
is ConstField -> fieldMappers.add(ConstantFieldMapper(value))
is ListField -> fieldMappers.add(ListFieldMapper(value))
is LanguageField -> fieldMappers.add(LanguageTagFieldMapper(value))
}
}
"rights" ->
when (val value = entry.value) {
is Map<*, *> -> {
for (rightsEntry in value.entries) {
val configField =
parseFieldWithKeyValidationForRules(rightsEntry as Map.Entry<String, Any>)
fieldMappers.add(RuleFieldMapper(configField))
}
}
else -> throw InvalidMappingException("Expected key value map under rights label.")
}
"title" ->
when (val value = entry.value) {
is Map<*, *> -> {
for (titleEntry in value.entries) {
val configField =
parseFieldWithKeyValidationForTitles(titleEntry as Map.Entry<String, Any>)
if (configField is LanguageField) {
fieldMappers.add(TitleFieldMapper(configField))
}
else
throw InvalidMappingException("Title mapping requires language tags!")
}
}
else -> throw InvalidMappingException("Expected key value map under title label.")
}
}
}
}
private fun parseField(entry: Map.Entry<String, Any>): ConfigField {
when (val value = entry.value) {
is String -> return SimpleField(entry.key, value)
is Map<*, *> -> {
return if (value.containsKey("const")) {
ConstField(entry.key, value["const"] as String)
} else {
val mutableList = mutableListOf<Pair<String, List<String>>>()
for (k in value.keys) {
if (k is String && listOf("de", "it", "fr").contains(k)) {
when (val source = value[k]) {
is String -> mutableList.add(Pair(k, listOf(source)))
is List<*> -> mutableList.add(Pair(k, source as List<String>))
else -> throw InvalidMappingException("Could not parse content of language tag definition. Must be list or string.")
}
}
}
if (mutableList.isNotEmpty()) {
LanguageField(entry.key, mutableList)
} else {
throw InvalidMappingException("Unknown key values in field mapping: $value. Use 'const' or 'de', 'fr', 'it' language tags!")
}
}
}
is List<*> -> return ListField(entry.key, value as List<String>)
else -> throw InvalidMappingException("Unknown structure for field mapping: $entry")
}
}
private fun parsePhysicalInstantiationConfig(source: Map<String, Any>) {
}
private fun parseDigitalInstantiationConfig(source: Map<String, Any>) {
}
private fun parseFieldWithKeyValidationForRules(entry: Map.Entry<String, Any>): ConfigField {
if (listOf("rights", "access", "holder", "usage").contains(entry.key)) {
return parseField(entry)
} else {
throw InvalidMappingException("Rights section does not allow type: ${entry.key}. Use 'rights', 'access', 'holder' or 'usage'.")
}
}
private fun parseFieldWithKeyValidationForTitles(entry: Map.Entry<String, Any>): ConfigField {
if (listOf("main", "serial", "broadcast").contains(entry.key)) {
return parseField(entry)
} else {
throw InvalidMappingException("Title section does not allow type: ${entry.key}. Use 'main', 'serial', 'broadcast'.")
}
}
}
interface FieldMapper {
fun apply(source: Map<String, String>, subject: RecordResource)
}
class StringFieldMapper(private val simpleField: SimpleField) : FieldMapper {
override fun apply(source: Map<String, String>, subject: RecordResource) {
source[simpleField.field].let {
if (it != null) {
subject.addStringLiteral(simpleField.key, it)
}
}
}
}
class ConstantFieldMapper(private val constantField: ConstField) : FieldMapper {
override fun apply(source: Map<String, String>, subject: RecordResource) {
subject.addStringLiteral(constantField.key, constantField.constant)
}
}
class ListFieldMapper(private val listField: ListField): FieldMapper {
override fun apply(source: Map<String, String>, subject: RecordResource) {
for (sourceField in listField.fields) {
source[sourceField].let {
if (it != null) {
subject.addStringLiteral(listField.key, it)
}
}
}
}
}
class LanguageTagFieldMapper(
private val languageField: LanguageField
) : FieldMapper {
override fun apply(source: Map<String, String>, subject: RecordResource) {
for (pair in languageField.fields) {
for (field in pair.second) {
if (source.containsKey(field)) {
subject.addLangLiteral(languageField.key, source[field] as String, pair.first)
}
}
}
}
}
class RuleFieldMapper(private val configField: ConfigField) : FieldMapper {
override fun apply(source: Map<String, String>, subject: RecordResource) {
//TODO: Implement rules. Need to know more first!
}
}
class TitleFieldMapper(private val languageField: LanguageField): FieldMapper {
override fun apply(source: Map<String, String>, subject: RecordResource) {
for (pair in languageField.fields) {
for (field in pair.second) {
if (source.containsKey(field)) {
subject.addTitle(languageField.key, source[field] as String, pair.first)
}
}
}
}
}
open class ConfigField
data class SimpleField(val key: String, val field: String) : ConfigField()
data class ConstField(val key: String, val constant: String) : ConfigField()
data class ListField(val key: String, val fields: List<String>) : ConfigField()
data class LanguageField(val key: String, val fields: List<Pair<String, List<String>>>) : ConfigField()
class InvalidMappingException(message: String) : Exception(message)
/*
* mapper-service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase
object NS {
val rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
val rdfs = "http://www.w3.org/2000/01/rdf-schema#"
val owl = "http://www.w3.org/2002/07/owl#"
val skos = "http://www.w3.org/2004/02/skos/core#"
val rico = "https://www.ica.org/standards/RiC/ontology#"
val ebucore = "http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#"
val memint = "https://memobase.ch/institution/"
val memrs = "https://memobase.ch/recordSet/"
val memr = "https://memobase.ch/record/"
val mempo = "https://memobase.ch/instatiation/physical/"
val memdo = "https://memobase.ch/instantiation/digital/"
val dce = "http://purl.org/dc/elements/1.1/"
val dct = "http://purl.org/dc/terms/"
val schema = "http://schema.org/"
val foaf = "http://xmlns.com/foaf/0.1/"
val wdt = "http://www.wikidata.org/prop/direct/"
val wdtn = "http://www.wikidata.org/prop/direct-normalized/"
}
\ No newline at end of file
/*
* mapper-service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase
import org.apache.jena.rdf.model.ModelFactory
import org.apache.jena.rdf.model.ResourceFactory
import org.apache.jena.riot.RDFDataMgr
import org.apache.jena.riot.RDFFormat
import java.io.ByteArrayOutputStream
import java.io.StringWriter
class RecordBuilder(
private val recordFieldMappers: List<FieldMapper>,
private val source: Map<String, String>,
institutionId: String, recordSetId: String, recordId: String) {
private val resource = RecordResource(recordId)
init {
resource.rdfType()
resource.isPartOf(recordSetId)
resource.providedBy(institutionId)
for (recordFieldMapper in recordFieldMappers) {
recordFieldMapper.apply(source, resource)
}
}
fun write(): String {
val writer = StringWriter()
RDFDataMgr.write(writer, resource.model, RDFFormat.TURTLE_PRETTY)
val result = writer.toString()
writer.close()
return result
}
}
\ No newline at end of file
/*
* mapper-service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase
import org.apache.jena.rdf.model.Literal
import org.apache.jena.rdf.model.ModelFactory
import org.apache.jena.rdf.model.Property
import org.apache.jena.rdf.model.Resource
import org.apache.jena.rdf.model.ResourceFactory
class RecordResource(id: String) {
val model = ModelFactory.createDefaultModel()
init {
model.setNsPrefix("rico", NS.rico)
model.setNsPrefix("rdf", NS.rdf)
model.setNsPrefix("dct", NS.dct)
model.setNsPrefix("ebucore", NS.ebucore)
}
val record = model.createResource(NS.memr + id)
// Classes
private val ricoRecord = model.createResource(NS.rico + "Record")
private val ricoTitleClass = model.createResource(NS.rico + "Title")