Commit 3d3c7a94 authored by Jonas Waeber's avatar Jonas Waeber

initial commit

parents
.gradle
.idea
build
out
*.iml
.idea
!.idea/copyright
build
.gradle
\ No newline at end of file
variables:
DOCKER_TLS_CERTDIR: ""
stages:
- test
- publish
test:
stage: test
image: gradle:6.3-jdk8
tags:
- mbr
script:
- gradle --no-daemon --no-scan --no-build-cache test --fail-fast --tests "org.memobase.Test"
.build-image:
stage: publish
image: docker:stable
services:
- docker:dind
script:
- docker login -u "$REGISTRY_USER" -p "$REGISTRY_PASSWORD" "$REGISTRY"
- docker build --pull -t "$IMAGE_TAG" -f "$DOCKERFILE" .
- docker push "$IMAGE_TAG"
- docker logout
build-tagged-image:
extends: .build-image
variables:
IMAGE_TAG: "$CI_REGISTRY_IMAGE:$CI_COMMIT_TAG"
REGISTRY_PASSWORD: "$CI_REGISTRY_PASSWORD"
REGISTRY_USER: "$CI_REGISTRY_USER"
REGISTRY: "$CI_REGISTRY"
DOCKERFILE: "Dockerfile"
only:
- tags
build-latest-image:
extends: .build-image
variables:
IMAGE_TAG: "$CI_REGISTRY_IMAGE:latest"
REGISTRY_PASSWORD: "$CI_REGISTRY_PASSWORD"
REGISTRY_USER: "$CI_REGISTRY_USER"
REGISTRY: "$CI_REGISTRY"
DOCKERFILE: "Dockerfile"
only:
- master
build-feature-branch-image:
extends: .build-image
variables:
IMAGE_TAG: "$CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME"
REGISTRY_PASSWORD: "$CI_REGISTRY_PASSWORD"
REGISTRY_USER: "$CI_REGISTRY_USER"
REGISTRY: "$CI_REGISTRY"
DOCKERFILE: "Dockerfile"
except:
- master
- tags
test-chart:
stage: test
image: dtzar/helm-kubectl:3.2.0
tags:
- mbr
script:
- helm lint chart/
publish-chart:
stage: publish
image: dtzar/helm-kubectl:3.2.0
tags:
- mbr
script:
- export HELM_EXPERIMENTAL_OCI=1
- helm registry login -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" "$CI_REGISTRY"
- ./insert_chart_versions.sh
- helm chart save chart/ "$CI_REGISTRY/$CI_PROJECT_PATH:$CI_COMMIT_TAG-chart"
- helm chart push "$CI_REGISTRY/$CI_PROJECT_PATH:$CI_COMMIT_TAG-chart"
- helm registry logout "$CI_REGISTRY"
only:
- tags
\ No newline at end of file
FROM gradle:6.3-jdk8
ADD . /
WORKDIR /
RUN gradle --no-daemon --no-scan --no-build-cache distTar
RUN cd /build/distributions && tar xf app.tar
FROM openjdk:8-jre-alpine
COPY --from=0 /build/distributions/app /app
CMD /app/bin/drupal-sync-service
This diff is collapsed.
## Search Doc Service
\ No newline at end of file
plugins {
id 'application'
id 'distribution'
id 'org.jetbrains.kotlin.jvm' version '1.3.71'
id 'com.palantir.git-version' version '0.11.0'
id 'org.jlleitschuh.gradle.ktlint' version '9.2.1'
}
group 'org.memobase'
version = gitVersion()
mainClassName = 'org.memobase.App'
jar {
manifest {
attributes 'Main-Class': 'org.memobase.App'
}
}
sourceCompatibility = 1.8
targetCompatibility = 1.8
repositories {
jcenter()
maven {
url "https://dl.bintray.com/jonas-waeber/memobase"
}
}
ext {
kafkaV = '2.3.1'
log4jV = '2.11.2'
}
dependencies {
// https://mvnrepository.com/artifact/org.elasticsearch.client/elasticsearch-rest-high-level-client
//compile group: 'org.elasticsearch.client', name: 'elasticsearch-rest-high-level-client', version: '7.1.0'
implementation 'org.memobase:memobase-service-utilities:1.4.0'
// Logging Framework
implementation "org.apache.logging.log4j:log4j-api:${log4jV}"
implementation "org.apache.logging.log4j:log4j-core:${log4jV}"
implementation "org.apache.logging.log4j:log4j-slf4j-impl:${log4jV}"
// Kafka Imports
//implementation group: 'org.apache.kafka', name: 'kafka-clients', version: kafkaV
implementation "org.apache.kafka:kafka-streams:${kafkaV}"
implementation 'org.apache.jena:apache-jena:3.14.0'
// YAML Parser
implementation 'org.snakeyaml:snakeyaml-engine:2.1'
implementation 'com.fasterxml.jackson.core:jackson-databind:2.11.+'
implementation "com.fasterxml.jackson.module:jackson-module-kotlin:2.11.+"
implementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.11.+'
// JSON Parser
implementation 'com.beust:klaxon:5.2'
implementation 'org.jetbrains.kotlin:kotlin-stdlib-jdk8'
implementation "org.jetbrains.kotlin:kotlin-script-runtime:1.3.71"
implementation "org.jetbrains.kotlin:kotlin-reflect:1.3.71"
testCompile("org.junit.jupiter:junit-jupiter:5.4.2")
testImplementation 'org.assertj:assertj-core:3.15.0'
// https://mvnrepository.com/artifact/org.apache.kafka/kafka-streams-test-utils
testCompile group: 'org.apache.kafka', name: 'kafka-streams-test-utils', version: kafkaV
}
compileKotlin {
kotlinOptions.jvmTarget = "1.8"
}
compileTestKotlin {
kotlinOptions.jvmTarget = "1.8"
}
test {
useJUnitPlatform()
testLogging {
events "passed", "skipped", "failed"
}
}
sourceSets {
main.kotlin.srcDirs += 'src/main/kotlin'
main.resources.srcDirs = [ "src/main/resources" ]
main.resources.includes = [ "**/*.yml", "**/*.xml"]
test.kotlin.srcDirs += 'src/test/kotlin'
test.resources.srcDirs = [ "src/test/resources" ]
test.resources.includes = [ "**/*.yml", "**/*.xml"]
}
plugins.withType(DistributionPlugin) {
distTar {
archiveFileName = 'app.tar'
}
}
\ No newline at end of file
apiVersion: v2
name: search-doc-service
description: This service transforms input rdf data into a search document for elasticsearch.
type: application
version: 0.0.0
appVersion: 0.0.0
maintainers:
- name: Jonas Waeber
email: jonas.waeber@unibas.ch
apiVersion: v1
kind: ConfigMap
metadata:
name: "{{ .Values.deploymentName }}-app-config"
namespace: memobase
data:
APPLICATION_ID: "{{ .Values.deploymentName }}-app"
TOPIC_IN: "{{ .Values.deploymentName }}-input"
TOPIC_OUT: "{{ .Values.deploymentName }}-output"
TOPIC_PROCESS: "{{ .Values.deploymentName }}-reporting"
\ No newline at end of file
apiVersion: apps/v1
kind: Deployment
metadata:
name: "{{ .Values.deploymentName }}-deployment"
namespace: memobase
labels:
app: "{{ .Values.deploymentName }}-deployment"
spec:
selector:
matchLabels:
app: "{{ .Values.deploymentName }}-deployment"
replicas: 1
template:
metadata:
labels:
app: "{{ .Values.deploymentName }}-deployment"
tier: post-processing
spec:
containers:
- name: "{{ .Values.deploymentName }}-container"
image: "{{.Values.registry}}/{{ .Values.image }}:{{ .Values.tag }}"
imagePullPolicy: Always
envFrom:
- configMapRef:
name: "{{ .Values.kafkaConfigs }}"
- configMapRef:
name: "{{ .Values.deploymentName}}-app-config"
volumeMounts:
- name: config-volume
mountPath: "/configs/mappings/"
volumes:
- name: config-volume
configMap:
name: "{{ .Values.configMapName }}"
restartPolicy: Always
#image values
registry: "cr.gitlab.switch.ch"
image: "memoriav/memobase-2020/services/elastic-services/search-doc-service"
tag: "latest"
deploymentName: search-doc-service
configMapName: search-doc-mapping-config
kafkaConfigs: prod-kafka-bootstrap-servers
\ No newline at end of file
kotlin.code.style=official
\ No newline at end of file
#Tue Apr 07 16:43:20 CEST 2020
distributionUrl=https\://services.gradle.org/distributions/gradle-6.3-all.zip
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
zipStorePath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
#!/usr/bin/env sh
##############################################################################
##
## Gradle start up script for UN*X
##
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
ls=`ls -ld "$PRG"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "$PRG"`"/$link"
fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null
APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m"'
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"
warn () {
echo "$*"
}
die () {
echo
echo "$*"
echo
exit 1
}
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
CYGWIN* )
cygwin=true
;;
Darwin* )
darwin=true
;;
MINGW* )
msys=true
;;
NONSTOP* )
nonstop=true
;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
else
JAVACMD="$JAVA_HOME/bin/java"
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD="java"
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
MAX_FD_LIMIT=`ulimit -H -n`
if [ $? -eq 0 ] ; then
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
MAX_FD="$MAX_FD_LIMIT"
fi
ulimit -n $MAX_FD
if [ $? -ne 0 ] ; then
warn "Could not set maximum file descriptor limit: $MAX_FD"
fi
else
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
fi
fi
# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi
# For Cygwin, switch paths to Windows format before running java
if $cygwin ; then
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
JAVACMD=`cygpath --unix "$JAVACMD"`
# We build the pattern for arguments to be converted via cygpath
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
SEP=""
for dir in $ROOTDIRSRAW ; do
ROOTDIRS="$ROOTDIRS$SEP$dir"
SEP="|"
done
OURCYGPATTERN="(^($ROOTDIRS))"
# Add a user-defined pattern to the cygpath arguments
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
fi
# Now convert the arguments - kludge to limit ourselves to /bin/sh
i=0
for arg in "$@" ; do
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
else
eval `echo args$i`="\"$arg\""
fi
i=$((i+1))
done
case $i in
(0) set -- ;;
(1) set -- "$args0" ;;
(2) set -- "$args0" "$args1" ;;
(3) set -- "$args0" "$args1" "$args2" ;;
(4) set -- "$args0" "$args1" "$args2" "$args3" ;;
(5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
(6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
(7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
(8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
(9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
esac
fi
# Escape application args
save () {
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
echo " "
}
APP_ARGS=$(save "$@")
# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
cd "$(dirname "$0")"
fi
exec "$JAVACMD" "$@"
@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@rem
@rem ##########################################################################
@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m"
@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto init
echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
if exist "%JAVA_EXE%" goto init
echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:init
@rem Get command-line arguments, handling Windows variants
if not "%OS%" == "Windows_NT" goto win9xME_args
:win9xME_args
@rem Slurp the command line arguments.
set CMD_LINE_ARGS=
set _SKIP=2
:win9xME_args_slurp
if "x%~1" == "x" goto execute
set CMD_LINE_ARGS=%*
:execute
@rem Setup the command line
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1
:mainEnd
if "%OS%"=="Windows_NT" endlocal
:omega
#!/usr/bin/env bash
echo Creating release: $1
sed -i "s/version: 0.0.0/version: $CI_COMMIT_TAG/g" ./chart/Chart.yaml
sed -i "s/appVersion: 0.0.0/appVersion: $CI_COMMIT_TAG/g" ./chart/Chart.yaml
sed -i "s/tag: \"latest\"/tag: $CI_COMMIT_TAG/g" ./chart/values.yaml
\ No newline at end of file
rootProject.name = 'drupal-sync-service'
/*
* search-doc-service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase
import kotlin.system.exitProcess
import org.apache.logging.log4j.LogManager
class App {
companion object {
private val log = LogManager.getLogger("App")
@JvmStatic fun main(args: Array<String>) {
try {
Service().run()
} catch (ex: Exception) {
ex.printStackTrace()
log.error("Stopping application due to error: " + ex.message)
exitProcess(1)
}
}
}
}
/*
* Table Data Import Service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase
import com.beust.klaxon.JsonArray
import com.beust.klaxon.JsonObject
import com.beust.klaxon.Klaxon
import com.beust.klaxon.KlaxonException
import java.io.StringReader
import org.apache.kafka.streams.StreamsBuilder
import org.apache.kafka.streams.Topology
import org.apache.logging.log4j.LogManager
import org.memobase.settings.SettingsLoader
class KafkaTopology(private val settings: SettingsLoader) {
private val log = LogManager.getLogger("StreamsProcessing")
fun build(): Topology {
val builder = StreamsBuilder()
val stream = builder.stream<String, String>(settings.inputTopic)
stream
.flatMapValues { value -> parseJson(value) }
.mapValues { value -> transformJson(value) }
.to(settings.outputTopic)