Commit c1c26a11 authored by Günter Hipler's avatar Günter Hipler
Browse files

initial setup for project based on IIIF creator from Sebastian

parents
Pipeline #19423 passed with stages
in 6 minutes and 31 seconds
.bloop/
.idea/
.metals/
target/
project/project
project/target
stages:
- test
- build
- publish
variables:
DOCKER_TLS_CERTDIR: ""
include:
- project: 'memoriav/memobase-2020/utilities/ci-templates'
file: 'sbt-build/sbt-build-2.13.yml'
- project: 'memoriav/memobase-2020/utilities/ci-templates'
file: 'docker-image/docker-image.yml'
\ No newline at end of file
version = "2.6.4"
FROM amd64/adoptopenjdk:11-jre
ADD target/scala-2.13/app.jar /app/app.jar
CMD java -jar /app/app.jar
# Creation of Europeana Data Model (EDM) documents
Transforms memobase Rico information into the EDM model
(the work is strongly inspired by earlier work by my colleagues Sebastian and Jonas- thanks!
- iiif-manifest-creator
- normalization-service
- mapper-service
- media-linker
- fedora-ingest-service
- drupal-sync-service
)
import Dependencies._
ThisBuild / scalaVersion := "2.13.3"
ThisBuild / version := "0.1.0-SNAPSHOT"
ThisBuild / organization := "ch.memobase"
ThisBuild / organizationName := "Memoriav"
ThisBuild / git.gitTagToVersionNumber := { tag: String =>
if (tag matches "[0-9]+\\..*") Some(tag)
else None
}
lazy val root = (project in file("."))
.settings(
name := "rico2edm",
assemblyJarName in assembly := "app.jar",
test in assembly := {},
mainClass in assembly := Some("ch.memobase.App"),
assemblyMergeStrategy in assembly := {
case "log4j.properties" => MergeStrategy.first
case "module-info.class" => MergeStrategy.discard
case "log4j2.xml" => MergeStrategy.first
case x =>
val oldStrategy = (assemblyMergeStrategy in assembly).value
oldStrategy(x)
},
resolvers ++= Seq(
"Memobase Utils" at "https://dl.bintray.com/memoriav/memobase"
),
libraryDependencies ++= Seq(
iiifApis,
// iiifPresentationApi,
kafkaStreams,
log4jApi,
log4jCore,
log4jScala,
log4jSlf4j,
memobaseServiceUtils,
upickle,
kafkaStreamsTestUtils % Test,
scalatic % Test,
scalaTest % Test
)
)
apiVersion: apps/v1
kind: Deployment
metadata:
name: pp-rico2edmtransfromer
namespace: memobase
labels:
app: pp-rico-2-edm-transfromer-app
spec:
selector:
matchLabels:
app: pp-rico-2-edm-transfromer-app
replicas: 1
template:
metadata:
labels:
app: pp-rico-2-edm-transfromer-app
tier: web
spec:
containers:
- name: pp-rico2edmtransfromer-container
image: cr.gitlab.switch.ch/memoriav/memobase-2020/services/postprocessing/rico-edm-transformer:latest
imagePullPolicy: Always
command: [ "java" ]
args: [ "-Xmx128m", "-Xmx256m", "-jar", "/app/app.jar" ]
resources:
requests:
cpu: "0.2"
memory: "128Mi"
limits:
cpu: "0.4"
memory: "256Mi"
env:
- name: JOB_ID
value: iiif-manifest-creator
- name: KAFKA_BOOTSTRAP_SERVERS
value: mb-ka1.memobase.unibas.ch:9092,mb-ka2.memobase.unibas.ch:9092,mb-ka3.memobase.unibas.ch:9092
- name: APPLICATION_ID
value: rico-2-edm-transfromer
- name: TOPIC_IN
value: fedora-output-json-records
- name: TOPIC_OUT
value: edm-documents
- name: TOPIC_PROCESS
value: postprocessing-reporting
restartPolicy: Always
{"name":"sbt","version":"1.4.4","bspVersion":"2.0.0-M5","languages":["scala"],"argv":["/usr/local/jdk-11.0.7+10/bin/java","-Xms100m","-Xmx100m","-classpath","/home/swissbib/env.local/tools/java.tools/sbt.1.4.4/bin/sbt-launch.jar","xsbt.boot.Boot","-bsp"]}
\ No newline at end of file
/*
* IIIF Manifest Creator
* Copyright (C) 2020 Memobase
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import sbt._
object Dependencies {
lazy val kafkaV = "2.7.0"
lazy val log4jV = "2.11.2"
lazy val scalatestV = "3.1.2"
lazy val iiifApis = "de.digitalcollections.iiif" % "iiif-apis" % "0.3.8"
lazy val iiifPresentationApi =
"ch.memobase.iiif" % "iiif-presentation-api-v3" % "0.0.1"
lazy val kafkaStreams = "org.apache.kafka" %% "kafka-streams-scala" % kafkaV
lazy val kafkaStreamsTestUtils =
"org.apache.kafka" % "kafka-streams-test-utils" % kafkaV
lazy val log4jApi = "org.apache.logging.log4j" % "log4j-api" % log4jV
lazy val log4jCore = "org.apache.logging.log4j" % "log4j-core" % log4jV
lazy val log4jScala = "org.apache.logging.log4j" %% "log4j-api-scala" % "12.0"
lazy val log4jSlf4j = "org.apache.logging.log4j" % "log4j-slf4j-impl" % log4jV
lazy val memobaseServiceUtils =
"org.memobase" % "memobase-service-utilities" % "1.4.1"
lazy val scalatic = "org.scalactic" %% "scalactic" % scalatestV
lazy val scalaTest = "org.scalatest" %% "scalatest" % scalatestV
lazy val upickle = "com.lihaoyi" %% "upickle" % "0.9.5"
}
#
# IIIF Manifest Creator
# Copyright (C) 2020 Memobase
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
sbt.version=1.3.13
// DO NOT EDIT! This file is auto-generated.
// This file enables sbt-bloop to create bloop config files.
addSbtPlugin("ch.epfl.scala" % "sbt-bloop" % "1.4.4-13-408f4d80")
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.10")
addSbtPlugin("com.typesafe.sbt" % "sbt-git" % "1.0.0")
addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0")
<!--
~ Fedora Event Handler
~ Copyright (C) 2020 Memobase
~
~ This program is free software: you can redistribute it and/or modify
~ it under the terms of the GNU Affero General Public License as
~ published by the Free Software Foundation, either version 3 of the
~ License, or (at your option) any later version.
~
~ This program is distributed in the hope that it will be useful,
~ but WITHOUT ANY WARRANTY; without even the implied warranty of
~ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
~ GNU Affero General Public License for more details.
~
~ You should have received a copy of the GNU Affero General Public License
~ along with this program. If not, see <https://www.gnu.org/licenses/>.
-->
<scalastyle>
<name>Scalastyle standard configuration</name>
<check level="warning" class="org.scalastyle.file.FileTabChecker" enabled="true"/>
<check level="warning" class="org.scalastyle.file.FileLengthChecker" enabled="true">
<parameters>
<parameter name="maxFileLength"><![CDATA[800]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.file.HeaderMatchesChecker" enabled="true">
<parameters>
<parameter name="header"><![CDATA[/*
* IIIF Manifest Creator
* Copyright (C) 2020 Memobase
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"/>
<check level="warning" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"/>
<check level="warning" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"/>
<check level="warning" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
<parameters>
<parameter name="maxLineLength"><![CDATA[160]]></parameter>
<parameter name="tabSize"><![CDATA[4]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
<parameters>
<parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
<parameters>
<parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
<parameters>
<parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"/>
<check level="warning" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true">
<parameters>
<parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
<parameters>
<parameter name="maxParameters"><![CDATA[8]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="false">
<parameters>
<parameter name="ignore"><![CDATA[-1,0,1,2,3]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="true"/>
<check level="warning" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="true"/>
<check level="warning" class="org.scalastyle.scalariform.ReturnChecker" enabled="true"/>
<check level="warning" class="org.scalastyle.scalariform.NullChecker" enabled="true"/>
<check level="warning" class="org.scalastyle.scalariform.NoCloneChecker" enabled="true"/>
<check level="warning" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"/>
<check level="warning" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"/>
<check level="warning" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"/>
<check level="warning" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters>
<parameter name="regex"><![CDATA[println]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="true">
<parameters>
<parameter name="maxTypes"><![CDATA[30]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="true">
<parameters>
<parameter name="maximum"><![CDATA[10]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"/>
<check level="warning" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="true"/>
<check level="warning" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
<parameters>
<parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
<parameter name="doubleLineAllowed"><![CDATA[false]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="true">
<parameters>
<parameter name="maxLength"><![CDATA[50]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="true">
<parameters>
<parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="true">
<parameters>
<parameter name="maxMethods"><![CDATA[30]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"/>
<check level="warning" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"/>
<check level="warning" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"/>
</scalastyle>
\ No newline at end of file
kafka:
streams:
bootstrap.servers: ${KAFKA_BOOTSTRAP_SERVERS:?system}
application.id: ${APPLICATION_ID:?system}
topic:
in: ${TOPIC_IN:?system}
out: ${TOPIC_OUT:?system}
process: ${TOPIC_PROCESS:?system}
<!--
~ Fedora Event Handler
~ Copyright (C) 2020 Memobase
~
~ This program is free software: you can redistribute it and/or modify
~ it under the terms of the GNU Affero General Public License as
~ published by the Free Software Foundation, either version 3 of the
~ License, or (at your option) any later version.
~
~ This program is distributed in the hope that it will be useful,
~ but WITHOUT ANY WARRANTY; without even the implied warranty of
~ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
~ GNU Affero General Public License for more details.
~
~ You should have received a copy of the GNU Affero General Public License
~ along with this program. If not, see <https://www.gnu.org/licenses/>.
-->
<Configuration status="info" name="iiif-manifest-creator" packages="">
<Appenders>
<Console name="STDOUT" target="SYSTEM_OUT">
<PatternLayout pattern="[%-5level] [%c{1}] %m%n"/>
</Console>
</Appenders>
<Loggers>
<Root level="info">
<AppenderRef ref="STDOUT"/>
</Root>
</Loggers>
</Configuration>
/*
* IIIF Manifest Creator
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
package ch.memobase
import java.time.Duration
import org.apache.kafka.streams.KafkaStreams
import org.apache.logging.log4j.scala.Logging
import org.memobase.settings.SettingsLoader
import scala.collection.JavaConverters._
import scala.util.{Failure, Success, Try}
object App extends scala.App with Logging {
val topology = new KafkaTopology
private val settings = new SettingsLoader(
List[String]().asJava,
"app.yml",
false,
true,
false,
false
)
val streams = new KafkaStreams(
topology.build(
settings.getInputTopic,
settings.getOutputTopic,
settings.getProcessReportTopic
),
settings.getKafkaStreamsSettings
)
val shutdownGracePeriodMs = 10000
logger.trace("Starting stream processing")
Try(
streams.start()
) match {
case Success(_) =>
logger.info("Workflow successful. Finishing...")
case Failure(f) =>
logger.error(s"Aborting due to errors: ${f.getMessage}")
sys.exit(1)
}
sys.ShutdownHookThread {
streams.close(Duration.ofMillis(shutdownGracePeriodMs))
}
}
/*
* IIIF Manifest Creator
* Copyright (C) 2020 Memobase
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package ch.memobase
import ujson.Value.{Value => JValue}
import ujson.{Arr => JArr, Obj => JObj, Str => JStr}
import scala.collection.mutable
import scala.util.Try
object Extractors {
val jsonGraph: String => Try[JValue] = jsonString =>
Try { ujson.read(jsonString).obj("@graph").arr }
private val fedoraResource =
(jsonGraph: JArr) =>
(resourceType: String) =>
Try {
jsonGraph.value.collectFirst {
case res
if (res.obj
.contains("type") && res.obj("type").str == resourceType) ||
(res.obj.contains("@type") && res
.obj("@type")
.str == resourceType) =>
res.obj.value
}.get
}
private val getBlankNodeContent = (graph: JArr) =>
(resource: mutable.LinkedHashMap[String, JValue]) =>
(property: String) =>
graph.value.collectFirst {
case res
if res.obj("@id") == resource.getOrElse(property, JStr("")) =>
res.obj.value
}
private val getBlankNodesContent = (graph: JArr) =>
(resource: mutable.LinkedHashMap[String, JValue]) =>
(property: String) =>
graph.value
.filter(res =>
res.obj("@id") == resource.getOrElse(property, JStr(""))
)
.map(_.obj.value)
val digitalObject: JArr => Try[mutable.LinkedHashMap[String, JValue]] = {
jsonGraph => fedoraResource(jsonGraph)("digitalObject")
}
val record: JArr => Try[mutable.LinkedHashMap[String, JValue]] = jsonGraph =>
fedoraResource(jsonGraph)(
"https://www.ica.org/standards/RiC/ontology#Record"
)
/*val recordSet: JArr => Try[mutable.LinkedHashMap[String, JValue]] =
jsonGraph => fedoraResource(jsonGraph)("recordSet")*/
private val stringValue = (jsonObj: JObj) =>
(valueKey: String) => jsonObj.value.get(valueKey).flatMap(v => v.strOpt)
private val numValue = (jsonObj: JObj) =>
(valueKey: String) =>
stringValue(jsonObj)(valueKey).flatMap(v => Some(v.toDouble))
private val arrayValues = (jsonObj: JObj) =>
(valueKey: String) =>
jsonObj.value.get(valueKey).flatMap(v => Some(v.arr.toList.map(_.str)))
val ricoType: JObj => Option[String] = record => stringValue(record)("type")
val title: JObj => Option[String] = record => stringValue(record)("title")
val descriptiveNote: JObj => Option[String] = record =>
stringValue(record)("descriptiveNote")
val scopeAndContent: JObj => Option[String] = record =>
stringValue(record)("scopeAndContent")
val publishedBy
: JArr => mutable.LinkedHashMap[String, JValue] => Option[String] =
graph =>
record =>
getBlankNodeContent(graph)(record)("publishedBy").flatMap(v =>
stringValue(v)("name")
)
val placeOfCapture
: JArr => mutable.LinkedHashMap[String, JValue] => Option[String] =
graph =>
record =>
getBlankNodeContent(graph)(record)("P60556").flatMap(v =>
stringValue(v)("name")
)
val spatial: JArr => mutable.LinkedHashMap[String, JValue] => Option[String] =
graph =>
record =>
getBlankNodeContent(graph)(record)("spatial").flatMap(v =>
stringValue(v)("name")
)
val hasLanguage
: JArr => mutable.LinkedHashMap[String, JValue] => Option[String] =
graph =>
record =>
getBlankNodeContent(graph)(record)("hasLanguage").flatMap(v =>
stringValue(v)("name")
)
//noinspection ScalaStyle
val resourceCreator: JArr => mutable.LinkedHashMap[String, JValue] => List[
(String, String)
] =
graph =>
record =>
getBlankNodesContent(graph)(record)(
"recordResourceOrInstantiationIsSourceOfCreationRelation"
)
.flatMap(obj =>
getBlankNodeContent(graph)(obj)("creationRelationHasTarget")
.flatMap(v => stringValue(v)("name")) match {
case Some(name) => Some(obj("name").str, name)
case None => None
}
)
.toList
val producer
: JArr => mutable.LinkedHashMap[String, JValue] => Option[String] =
graph =>
record =>
getBlankNodeContent(graph)(record)("P60441").flatMap(v =>
stringValue(v)("name")
)