Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
memoriav
Memobase 2020
services
postprocessing
rico-edm-transformer
Commits
12d7ab0f
Commit
12d7ab0f
authored
Mar 09, 2021
by
Günter Hipler
Browse files
first EDM document we can use to talk about
parent
cb716f4a
Pipeline
#22995
passed with stages
in 6 minutes and 31 seconds
Changes
8
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
gh/fragen.bei.erstellung.md
View file @
12d7ab0f
...
...
@@ -58,7 +58,15 @@ und mit Daniel/Silvia abstimmen, wie diese Zusammenhänge nach Europeana übertr
-
für Silvia sind auch noch Punkte offen
-
jetzt nutze ich isHeldBy und isPartof für institution und recordset
-
im Moment nutze ich die Informationen von heldBy und isPartOf für recordset und institution Einträge im ES Dokument
Diese Informationen können dann für sets in der OAI Schnittstelle verwendet werden
#### owl:sameAs (CHO object)
-
welcher link soll genommen werden?
#### skos:prefLabel
ist es korrekt, dafür den Extractor Extractors.dcTermsSpatial(graph)(record.value) zu verwenden??
## allgemeine Fragen
...
...
src/main/scala/ch/memobase/edm/EDM.scala
View file @
12d7ab0f
...
...
@@ -21,7 +21,7 @@
package
ch.memobase.edm
import
ch.memobase.edm.subjects.
{
ModelXMLTransformer
,
ProvidedCHO
,
WebResource
}
import
ch.memobase.edm.subjects.
{
Aggregation
,
Concept
,
ModelXMLTransformer
,
Place
,
ProvidedCHO
,
TimeSpan
,
WebResource
}
import
java.time.format.DateTimeFormatter
import
scala.collection.mutable
...
...
@@ -29,7 +29,6 @@ import scala.collection.mutable.ArrayBuffer
import
scala.util.Try
import
ujson.
{
Arr
=>
JArr
,
Obj
=>
JObj
}
import
scala.jdk.CollectionConverters._
class
EDM
{
...
...
@@ -51,9 +50,19 @@ class EDM {
val
choExtraction
=
createChoObject
(
graph
,
record
.
get
,
digitalObject
)
val
webExtraction
=
createWebResources
(
graph
,
record
.
get
,
digitalObject
)
//actually only one instance - correct?
val
placeExtraction
:
ExtractionResult
[
Place
]
=
createPlace
(
graph
,
record
.
get
,
digitalObject
)
val
aggregationExtraction
:
ExtractionResult
[
Aggregation
]
=
createAggregation
(
graph
,
record
.
get
,
digitalObject
)
val
conceptExtraction
:
ExtractionResult
[
Concept
]
=
createConcept
(
graph
,
record
.
get
,
digitalObject
)
val
timespanExtraction
:
ExtractionResult
[
TimeSpan
]
=
createTimeSpan
(
graph
,
record
.
get
,
digitalObject
)
webExtraction
.
obj
.
foreach
(
webResource
=>
choExtraction
.
obj
.
getModel
.
addAll
(
webResource
.
getModel
)
)
choExtraction
.
obj
.
getModel
.
addAll
(
placeExtraction
.
obj
.
getModel
)
//choExtraction.obj.getModel.addAll(aggregationExtraction.obj.getModel)
//choExtraction.obj.getModel.addAll(conceptExtraction.obj.getModel)
//choExtraction.obj.getModel.addAll(timespanExtraction.obj.getModel)
val
esObject
=
ModelXMLTransformer
(
model
=
choExtraction
.
obj
.
getModel
,
id
=
recordId
,
...
...
@@ -63,15 +72,12 @@ class EDM {
.
map
(
identInstitution
=>
EDM
.
getInstitutionOrRecordsetIdent
(
identInstitution
))
)
val
result
=
ExtractionResult
((
//todo: by now we do not collect any infos and warnings
ExtractionResult
((
recordId
,
esObject
)
,
new
ArrayBuffer
[
String
]())
result
,
new
ArrayBuffer
[
String
]())
}
}
...
...
@@ -159,7 +165,8 @@ class EDM {
Extractors
.
dcTermsIssuedTemporal
(
graph
)(
record
.
value
)(
"temporal"
)
.
foreach
(
c
=>
cho
.
addDcTermsTemporal
(
Some
(
c
)))
Extractors
.
edmType
(
record
.
value
)
.
foreach
(
c
=>
cho
.
addEdmType
(
Some
(
c
)))
ExtractionResult
(
cho
)
...
...
@@ -173,13 +180,65 @@ class EDM {
ExtractionResult
(
webresource
.
getOrElse
(
List
())
)
}
def
createPlace
(
graph
:
JArr
,
record
:
JObj
,
digitalObject
:
JObj
)
:
ExtractionResult
[
Place
]
=
{
//how many places instances are available??
val
place
=
new
Place
(
Extractors
.
recordId
(
record
).
get
)
Extractors
.
dcTermsSpatial
(
graph
)(
record
.
value
)
.
foreach
(
c
=>
place
.
addSkosPrefLabel
(
Some
(
c
)))
//val place: Option[List[WebResource]] = Extractors.recordId(record).map(id => List[Place](new Place(id)))
ExtractionResult
(
place
)
}
def
createConcept
(
graph
:
JArr
,
record
:
JObj
,
digitalObject
:
JObj
)
:
ExtractionResult
[
Concept
]
=
{
//how many places instances are available??
//is it correct to use recordId
val
concept
=
new
Concept
(
Extractors
.
recordId
(
record
).
get
)
//val concept = new Concept("http://iwerk.ch/1234")
concept
.
addSkosPrefLabel
(
Some
(
"only test for concept"
))
//val place: Option[List[WebResource]] = Extractors.recordId(record).map(id => List[Place](new Place(id)))
ExtractionResult
(
concept
)
}
def
createAggregation
(
graph
:
JArr
,
record
:
JObj
,
digitalObject
:
JObj
)
:
ExtractionResult
[
Aggregation
]
=
{
//how many places instances are available??
//is it correct to use recordId
val
aggregation
=
new
Aggregation
(
Extractors
.
recordId
(
record
).
get
)
//val aggregation = new Aggregation("http://iwerk.ch/4567")
aggregation
.
addSkosPrefLabel
(
Some
(
"only test for aggregation"
))
//val place: Option[List[WebResource]] = Extractors.recordId(record).map(id => List[Place](new Place(id)))
ExtractionResult
(
aggregation
)
}
def
createTimeSpan
(
graph
:
JArr
,
record
:
JObj
,
digitalObject
:
JObj
)
:
ExtractionResult
[
TimeSpan
]
=
{
//how many places instances are available??
//is it correct to use recordId
val
timespan
=
new
TimeSpan
(
Extractors
.
recordId
(
record
).
get
)
//val timespan = new TimeSpan("http://iwerk.ch/891011")
timespan
.
addSkosPrefLabel
(
Some
(
"only test for timespan"
))
//val place: Option[List[WebResource]] = Extractors.recordId(record).map(id => List[Place](new Place(id)))
ExtractionResult
(
timespan
)
}
/*
todo: my assumption: recordset and institution have cardinality n
*/
def
extractRecordset
(
graph
:
JArr
,
record
:
JObj
)
:
List
[
String
]
=
???
//
def extractRecordset(graph: JArr, record: JObj): List[String] = ???
def
extractInstitution
(
graph
:
JArr
,
record
:
JObj
)
:
List
[
String
]
=
???
//
def extractInstitution(graph: JArr, record: JObj): List[String] = ???
}
...
...
src/main/scala/ch/memobase/edm/Extractors.scala
View file @
12d7ab0f
...
...
@@ -156,6 +156,23 @@ object Extractors {
stringValue
(
v
)(
"name"
)
)
//todo: this is just a quick mapping
//do we need some kind of configuration or is there already one?
private
val
edmTypeMapping
=
Map
(
"film"
->
"VIDEO"
,
"foto"
->
"IMAGE"
,
"radio"
->
"SOUND"
,
"ton"
->
"SOUND"
,
"tonbildschau"
->
"no mapping for audio-visual show"
,
"tv"
->
"VIDEO"
,
"video"
->
"VIDEO"
)
val
edmType
:
mutable.LinkedHashMap
[
String
,
JValue
]
=>
Option
[
String
]
=
record
=>
stringValue
(
record
)(
"type"
)
.
map
(
t
=>
edmTypeMapping
.
getOrElse
(
t
.
toLowerCase
,
s
"no mapping for ${t} as rico:type"
))
val
publishedByGH
:
JArr
=>
mutable
.
LinkedHashMap
[
String
,
JValue
]
=>
List
[
String
]
=
graph
=>
...
...
src/main/scala/ch/memobase/edm/subjects/ModelXMLTransformer.scala
View file @
12d7ab0f
...
...
@@ -23,6 +23,7 @@ package ch.memobase.edm.subjects
import
ch.memobase.rdf.writer.RdfXmlWriter
import
org.eclipse.rdf4j.model.Model
import
org.eclipse.rdf4j.rio.Rio
import
org.eclipse.rdf4j.rio.rdfxml.RDFXMLWriter
import
java.io.StringWriter
import
java.time.ZonedDateTime
...
...
@@ -43,7 +44,9 @@ object ModelXMLTransformer {
)
:
String
=
{
val
sOut
=
new
StringWriter
//we use the adapted Writer for EDM
val
rdfWriter
=
new
RdfXmlWriter
(
sOut
)
//val rdfWriter = new RDFXMLWriter(sOut)
Rio
.
write
(
model
,
rdfWriter
)
//create whole ES structure and replace XML prolog
...
...
src/main/scala/ch/memobase/edm/subjects/ProvidedCHO.scala
View file @
12d7ab0f
...
...
@@ -21,18 +21,18 @@
package
ch.memobase.edm.subjects
import
ch.memobase.rdf.Helper
import
ch.memobase.rdf.vocabularies.EDM
import
ch.memobase.rdf.vocabularies.
{
EDM
Vocab
,
OreVocab
}
import
org.eclipse.rdf4j.model.Model
import
org.eclipse.rdf4j.model.impl.SimpleValueFactory
import
org.eclipse.rdf4j.model.util.Values.iri
import
org.eclipse.rdf4j.model.vocabulary.
{
DC
,
DCTERMS
,
RDF
}
import
org.eclipse.rdf4j.model.vocabulary.
{
DC
,
DCTERMS
,
RDF
,
SKOS
}
class
ProvidedCHO
(
val
id
:
String
)
{
import
org.eclipse.rdf4j.model.util.Values.iri
//import org.eclipse.rdf4j.model.util.Values.literal
private
val
model
=
Helper
.
getModelWithEDMNamespaces
model
.
add
(
iri
(
id
),
RDF
.
TYPE
,
EDM
.
ProvidedCHO
)
model
.
add
(
iri
(
id
),
RDF
.
TYPE
,
EDM
Vocab
.
ProvidedCHO
)
private
val
factory
=
SimpleValueFactory
.
getInstance
()
...
...
@@ -92,17 +92,83 @@ class ProvidedCHO (val id: String) {
def
addDcTermsTemporal
(
temporal
:
Option
[
String
])
:
Unit
=
temporal
.
map
(
t
=>
model
.
add
(
iri
(
id
),
DCTERMS
.
TEMPORAL
,
factory
.
createLiteral
(
t
)))
def
addEdmType
(
edmtype
:
Option
[
String
])
:
Unit
=
edmtype
.
map
(
t
=>
model
.
add
(
iri
(
id
),
EDMVocab
.
TYPE
,
factory
.
createLiteral
(
t
)))
def
getModel
:
Model
=
model
}
class
WebResource
(
val
id
:
String
)
{
private
val
model
=
Helper
.
getModelWithEDMNamespaces
model
.
add
(
iri
(
id
),
RDF
.
TYPE
,
EDM
.
WebResource
)
private
val
factory
=
SimpleValueFactory
.
getInstance
()
model
.
add
(
iri
(
id
),
RDF
.
TYPE
,
EDMVocab
.
WebResource
)
def
getModel
:
Model
=
model
}
class
Aggregation
(
val
id
:
String
)
{
private
val
model
=
Helper
.
getModelWithEDMNamespaces
model
.
add
(
iri
(
id
),
RDF
.
TYPE
,
OreVocab
.
AGGREGATION
)
private
val
factory
=
SimpleValueFactory
.
getInstance
()
def
addSkosPrefLabel
(
skospreflabel
:
Option
[
String
])
:
Unit
=
skospreflabel
.
map
(
t
=>
model
.
add
(
iri
(
id
),
SKOS
.
PREF_LABEL
,
factory
.
createLiteral
(
t
)))
def
getModel
:
Model
=
model
}
class
Aggregation
class
Concept
(
val
id
:
String
)
{
private
val
model
=
Helper
.
getModelWithEDMNamespaces
model
.
add
(
iri
(
id
),
RDF
.
TYPE
,
SKOS
.
CONCEPT
)
private
val
factory
=
SimpleValueFactory
.
getInstance
()
def
addSkosPrefLabel
(
skospreflabel
:
Option
[
String
])
:
Unit
=
skospreflabel
.
map
(
t
=>
model
.
add
(
iri
(
id
),
SKOS
.
PREF_LABEL
,
factory
.
createLiteral
(
t
)))
def
getModel
:
Model
=
model
}
class
TimeSpan
(
val
id
:
String
)
{
//not used so far in Silvia mapping
private
val
model
=
Helper
.
getModelWithEDMNamespaces
model
.
add
(
iri
(
id
),
RDF
.
TYPE
,
EDMVocab
.
TIMESPAN
)
private
val
factory
=
SimpleValueFactory
.
getInstance
()
def
addSkosPrefLabel
(
skospreflabel
:
Option
[
String
])
:
Unit
=
skospreflabel
.
map
(
t
=>
model
.
add
(
iri
(
id
),
SKOS
.
PREF_LABEL
,
factory
.
createLiteral
(
t
)))
def
getModel
:
Model
=
model
}
class
Place
(
val
id
:
String
)
{
//not used so far in Silvia mapping
private
val
model
=
Helper
.
getModelWithEDMNamespaces
model
.
add
(
iri
(
id
),
RDF
.
TYPE
,
EDMVocab
.
PLACE
)
private
val
factory
=
SimpleValueFactory
.
getInstance
()
def
addSkosPrefLabel
(
skospreflabel
:
Option
[
String
])
:
Unit
=
skospreflabel
.
map
(
t
=>
model
.
add
(
iri
(
id
),
SKOS
.
PREF_LABEL
,
factory
.
createLiteral
(
t
)))
def
getModel
:
Model
=
model
}
src/main/scala/ch/memobase/rdf/vocabularies/VocabularyFactory.scala
View file @
12d7ab0f
...
...
@@ -29,11 +29,22 @@ sealed abstract class VocabularyFactory(val NAMESPACE: String) {
}
object
EDM
extends
VocabularyFactory
(
"http://www.europeana.eu/schemas/edm/"
)
{
object
EDM
Vocab
extends
VocabularyFactory
(
"http://www.europeana.eu/schemas/edm/"
)
{
val
ProvidedCHO
:
IRI
=
getIri
(
"ProvidedCHO"
)
val
WebResource
:
IRI
=
getIri
(
"WebResource"
)
val
TIMESPAN
:
IRI
=
getIri
(
"TimeSpan"
)
val
PLACE
:
IRI
=
getIri
(
"Place"
)
val
TYPE
:
IRI
=
getIri
(
"type"
)
}
object
OreVocab
extends
VocabularyFactory
(
"http://www.openarchives.org/ore/terms/"
)
{
val
AGGREGATION
:
IRI
=
getIri
(
"Aggregation"
)
}
src/test/scala/ch/memobase/edm/CHOSpec.scala
View file @
12d7ab0f
...
...
@@ -316,6 +316,13 @@ class CHOSpec extends AnyFunSuite with Matchers{
}
test
(
"edm type"
)
{
val
graph
=
Extractors
.
jsonGraph
(
ricoContributorP60441
).
get
.
arr
val
record
=
Extractors
.
record
(
graph
)
val
edmType
=
Extractors
.
edmType
(
record
.
get
)
assert
(
edmType
.
isDefined
&&
edmType
.
get
==
"SOUND"
)
}
}
src/test/scala/ch/memobase/edm/EDMSpec.scala
0 → 100644
View file @
12d7ab0f
/*
* rico2edm
* Copyright (C) 2021 UB Basel
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
package
ch.memobase.edm
import
ch.memobase.Utils.loadFile
import
ch.memobase.edm.subjects.Aggregation
import
ch.memobase.rdf.writer.RdfXmlWriter
import
org.eclipse.rdf4j.rio.Rio
import
org.scalatest.funsuite.AnyFunSuite
import
org.scalatest.matchers.should.Matchers
import
java.io.StringWriter
import
scala.util.
{
Failure
,
Success
,
Try
}
class
EDMSpec
extends
AnyFunSuite
with
Matchers
{
private
lazy
val
ricoWithContributors
=
loadFile
(
"src/test/resources/raw.contributor.json"
)
test
(
"complete EDM creation"
)
{
val
edm
=
new
EDM
val
edmCreationResult
:
Try
[
ExtractionResult
[(
String
,
String
)]]
=
edm
.
create
(
ricoWithContributors
)
assert
(
edmCreationResult
.
isSuccess
)
edmCreationResult
match
{
case
Success
(
value
)
=>
println
(
value
.
obj
.
_1
)
println
(
value
.
obj
.
_2
)
case
Failure
(
exception
)
=>
println
(
exception
)
}
}
test
(
"aggregation creation"
)
{
val
graph
=
Extractors
.
jsonGraph
(
ricoWithContributors
).
get
.
arr
val
record
=
Extractors
.
record
(
graph
)
val
recordId
=
Extractors
.
recordId
(
record
.
get
).
get
val
agg
=
new
Aggregation
(
recordId
)
val
sOut
=
new
StringWriter
val
rdfWriter
=
new
RdfXmlWriter
(
sOut
)
Rio
.
write
(
agg
.
getModel
,
rdfWriter
)
println
(
sOut
)
}
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment