Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
memoriav
Memobase 2020
services
postprocessing
rico-edm-transformer
Commits
20c2cbc5
Commit
20c2cbc5
authored
Apr 27, 2021
by
Günter Hipler
Browse files
included rules to define which datasets or institutions are allowed for
OAI export
parent
3f9dd3ad
Pipeline
#25564
passed with stages
in 8 minutes and 59 seconds
Changes
9
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
configs/oai_export_definitions/rules.json
0 → 100644
View file @
20c2cbc5
{
"isHeldBy"
:
[],
"isPartOf"
:
[
"apf-001"
,
"apf-002"
]
}
\ No newline at end of file
src/main/resources/app.yml
View file @
20c2cbc5
...
...
@@ -5,6 +5,7 @@ app:
#reuseStatementLabelsPath: "/home/swissbib/environment/code/repositories/memoriav/gitlab/services/postprocessing/rico-edm-transformer/configs/reuse_statements/labels.csv"
isocodemapping
:
${ISOCODE_MAPPING:?system}
institutionscoordinates
:
${INSTITUTIONS_COORDINATES:?system}
exportDefinitionRules
:
${EXPORT_DEFINITION_RULES:?system}
elastic
:
host
:
${ELASTIC_HOST:?system}
port
:
${ELASTIC_PORT:?system}
...
...
src/main/scala/ch/memobase/rico2edm/KafkaTopology.scala
View file @
20c2cbc5
...
...
@@ -44,17 +44,20 @@ class KafkaTopology extends Logging {
reportingTopic
:
String
,
appSettings
:
Properties
)
:
Topology
=
{
val
builder
=
new
StreamsBuilder
val
edmBuilder
=
new
EDM
val
source
=
builder
.
stream
[
String
,
String
](
topicIn
)
//val Array(noDigitalObject, noLocator, noPhoto, isPhoto) = source
//we have to discuss, which documents should be delivered to Europeana
val
Array
(
no
DigitalObject
,
noLocat
or
,
isEDMDeliverable
)
=
source
val
Array
(
no
OaiExp
or
t
,
isEDMDeliverable
)
=
source
.
branch
(
(
_
,
v
)
=>
hasNoDigitalObject
(
v
),
(
_
,
v
)
=>
hasNoLocator
(
v
),
(
_
,
v
)
=>
notDefinedForOaiExport
(
v
),
(
_
,
_
)
=>
true
)
...
...
@@ -75,12 +78,13 @@ class KafkaTopology extends Logging {
reportEDMCreationFailure
(
noEDM
,
reportingTopic
)
reportIgnoredRecord
(
no
Locat
or
,
no
OaiExp
or
t
,
reportingTopic
,
"
Digital object has no locat
or"
"
record not defined for oai exp
or
t
"
)
/*
reportIgnoredRecord(
noDigitalObject,
reportingTopic,
...
...
@@ -88,6 +92,8 @@ class KafkaTopology extends Logging {
)
*/
/*
...
...
src/main/scala/ch/memobase/rico2edm/KafkaTopologyUtils.scala
View file @
20c2cbc5
...
...
@@ -21,7 +21,10 @@
package
ch.memobase.rico2edm
import
scala.util.Try
import
ch.memobase.rico2edm.edm.Extractors
import
ch.memobase.rico2edm.edm.
{
EDM
,
Extractors
}
import
ch.memobase.rico2edm.utils.Helper
case
class
OaiExportRules
(
institutions
:
List
[
String
],
dataSets
:
List
[
String
])
object
KafkaTopologyUtils
{
...
...
@@ -45,5 +48,18 @@ object KafkaTopologyUtils {
.
flatMap
(
dO
=>
Try
(
Extractors
.
imageResourceId
(
dO
).
get
))
.
isFailure
def
notDefinedForOaiExport
(
msgVal
:
String
)
:
Boolean
=
{
val
graph
=
Extractors
.
jsonGraph
(
msgVal
).
get
val
record
=
Extractors
.
record
(
graph
.
arr
).
get
!
(
Helper
.
isPartOfSet
(
Extractors
.
recordSetOrInstitution
(
record
)(
"isPartOf"
)
map
{
EDM
.
getInstitutionOrRecordsetIdent
}
)
||
Helper
.
isPartOfInstitution
(
Extractors
.
recordSetOrInstitution
(
record
)(
"heldBy"
)
map
{
EDM
.
getInstitutionOrRecordsetIdent
}
))
}
}
src/main/scala/ch/memobase/rico2edm/Main.scala
View file @
20c2cbc5
...
...
@@ -48,7 +48,8 @@ object Main extends Logging {
Keys
.
ELASTIC_PORT
,
Keys
.
INSTITUTION_INDEX
,
Keys
.
RECORDSET_INDEX
,
Keys
.
ELASTIC_CLUSTERNAME
Keys
.
ELASTIC_CLUSTERNAME
,
Keys
.
EXPORT_DEFINITION_RULES
,
).
asJava
,
"app.yml"
,
false
,
...
...
@@ -56,6 +57,8 @@ object Main extends Logging {
false
,
false
)
val
streams
=
new
KafkaStreams
(
topology
.
build
(
settings
.
getInputTopic
,
...
...
@@ -69,6 +72,9 @@ object Main extends Logging {
val
shutdownGracePeriodMs
=
10000
Helper
.
initOaiExportRules
(
settings
.
getAppSettings
)
Helper
.
initLanguageCodeMapping
(
settings
.
getAppSettings
)
Helper
.
initInstitutionsCoordinateMapping
(
settings
.
getAppSettings
)
ElasticSearchClientWrapper
(
settings
.
getAppSettings
)
...
...
src/main/scala/ch/memobase/rico2edm/utils/Helper.scala
View file @
20c2cbc5
...
...
@@ -21,10 +21,14 @@
package
ch.memobase.rico2edm.utils
import
ch.memobase.rico2edm.OaiExportRules
import
java.io.ByteArrayOutputStream
import
java.util.zip.Deflater
import
java.util.
{
Properties
,
HashMap
=>
JHashMap
}
import
scala.language.reflectiveCalls
import
scala.util.
{
Failure
,
Success
,
Try
}
import
ujson.
{
Obj
=>
JObj
}
object
Helper
{
...
...
@@ -32,6 +36,9 @@ object Helper {
private
var
institutionsCoordinates
:
Option
[
JHashMap
[
String
,
(
String
,
String
)]]
=
None
private
var
exportRules
:
Option
[
OaiExportRules
]
=
None
def
compress
(
data
:
Array
[
Byte
])
:
Array
[
Byte
]
=
{
...
...
@@ -54,6 +61,26 @@ object Helper {
}
def
initOaiExportRules
(
props
:
Properties
)
:
Unit
=
{
using
(
io
.
Source
.
fromFile
(
props
.
get
(
Keys
.
EXPORT_DEFINITION_RULES
).
toString
))
{
source
=>
val
content
=
source
.
getLines
.
mkString
(
"\n"
)
val
json
:
JObj
=
Try
{
ujson
.
read
(
content
)
}
match
{
case
Success
(
value
)
=>
value
.
obj
case
Failure
(
exception
)
=>
throw
new
Exception
(
"didn't get any OAI export rules"
,
exception
)
}
val
institutions
:
List
[
String
]
=
json
(
"isHeldBy"
).
arr
.
toList
.
map
(
_
.
str
)
val
sets
:
List
[
String
]
=
json
(
"isPartOf"
).
arr
.
toList
.
map
(
_
.
str
)
exportRules
=
Some
(
OaiExportRules
(
institutions
,
sets
))
}
}
def
initLanguageCodeMapping
(
props
:
Properties
)
:
Unit
=
{
...
...
@@ -101,6 +128,14 @@ object Helper {
}
}
val
isPartOfSet
:
List
[
String
]
=>
Boolean
=
list
=>
list
.
exists
{
item
=>
{
if
(
exportRules
.
isDefined
)
exportRules
.
get
.
dataSets
.
contains
(
item
)
else
false
}
}
val
isPartOfInstitution
:
List
[
String
]
=>
Boolean
=
list
=>
list
.
exists
{
item
=>
{
if
(
exportRules
.
isDefined
)
exportRules
.
get
.
institutions
.
contains
(
item
)
else
false
}
}
def
getInstitutionCoord
(
institutionId
:
String
)
:
Option
[(
String
,
String
)]
=
{
institutionsCoordinates
match
{
case
Some
(
coords
)
if
coords
.
containsKey
(
institutionId
)
=>
...
...
src/main/scala/ch/memobase/rico2edm/utils/Keys.scala
View file @
20c2cbc5
...
...
@@ -30,6 +30,8 @@ object Keys {
val
ELASTIC_PORT
=
"elastic.port"
val
ELASTIC_CLUSTERNAME
=
"elastic.clustername"
val
EXPORT_DEFINITION_RULES
=
"exportDefinitionRules"
val
VIMEO_KEY
=
"VIMEO"
...
...
src/test/resources/exportrules/rules.json
0 → 100644
View file @
20c2cbc5
{
"isHeldBy"
:
[
"ati"
],
"isPartOf"
:
[
"apf-001"
,
"apf-002"
]
}
\ No newline at end of file
src/test/scala/ch/memobase/rico2edm/edm/GeneralSpec.scala
0 → 100644
View file @
20c2cbc5
/*
* rico2edm
* Copyright (C) 2021 UB Basel
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
package
ch.memobase.rico2edm.edm
import
ch.memobase.Utils.loadFile
import
ch.memobase.rico2edm.KafkaTopologyUtils
import
ch.memobase.rico2edm.utils.
{
Helper
,
Keys
}
import
org.scalatest.funsuite.AnyFunSuite
import
org.scalatest.matchers.should.Matchers
import
java.util.Properties
class
GeneralSpec
extends
AnyFunSuite
with
Matchers
{
private
lazy
val
apf001
=
loadFile
(
"src/test/resources/apf-001-1280.json"
)
private
lazy
val
noapf
=
loadFile
(
"src/test/resources/rico.duration.json"
)
private
lazy
val
definedInstitution
=
loadFile
(
"src/test/resources/rawIndent.no.contributor.json"
)
test
(
"is part of europeana export"
)
{
val
props
=
new
Properties
props
.
put
(
Keys
.
EXPORT_DEFINITION_RULES
,
"src/test/resources/exportrules/rules.json"
)
Helper
.
initOaiExportRules
(
props
)
assert
(!
KafkaTopologyUtils
.
notDefinedForOaiExport
(
apf001
))
assert
(
KafkaTopologyUtils
.
notDefinedForOaiExport
(
noapf
))
assert
(!
KafkaTopologyUtils
.
notDefinedForOaiExport
(
definedInstitution
))
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment