Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
memoriav
Memobase 2020
services
postprocessing
rico-edm-transformer
Commits
909a6414
Commit
909a6414
authored
Feb 01, 2021
by
Günter Hipler
Browse files
more necessary types for the use case
parent
fa9b4349
Pipeline
#20975
failed with stages
in 8 minutes and 33 seconds
Changes
9
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
src/main/java/ch/memobase/rdf/writer/RdfXmlWriter.java
0 → 100644
View file @
909a6414
package
ch.memobase.rdf.writer
;
import
org.eclipse.rdf4j.common.net.ParsedIRI
;
import
org.eclipse.rdf4j.common.xml.XMLUtil
;
import
org.eclipse.rdf4j.model.*
;
import
org.eclipse.rdf4j.model.util.Literals
;
import
org.eclipse.rdf4j.model.vocabulary.RDF
;
import
org.eclipse.rdf4j.model.vocabulary.XSD
;
import
org.eclipse.rdf4j.rio.RDFHandlerException
;
import
org.eclipse.rdf4j.rio.rdfxml.RDFXMLWriter
;
import
java.io.IOException
;
import
java.io.OutputStream
;
import
java.io.Writer
;
public
class
RdfXmlWriter
extends
RDFXMLWriter
{
public
RdfXmlWriter
(
OutputStream
out
)
{
super
(
out
);
}
public
RdfXmlWriter
(
OutputStream
out
,
ParsedIRI
baseIRI
)
{
super
(
out
,
baseIRI
);
}
public
RdfXmlWriter
(
Writer
writer
)
{
super
(
writer
);
}
public
RdfXmlWriter
(
Writer
writer
,
ParsedIRI
baseIRI
)
{
super
(
writer
,
baseIRI
);
}
@Override
protected
void
consumeStatement
(
Statement
st
)
{
Resource
subj
=
st
.
getSubject
();
IRI
pred
=
st
.
getPredicate
();
Value
obj
=
st
.
getObject
();
// Verify that an XML namespace-qualified name can be created for the
// predicate
String
predString
=
pred
.
toString
();
int
predSplitIdx
=
XMLUtil
.
findURISplitIndex
(
predString
);
if
(
predSplitIdx
==
-
1
)
{
throw
new
RDFHandlerException
(
"Unable to create XML namespace-qualified name for predicate: "
+
predString
);
}
String
predNamespace
=
predString
.
substring
(
0
,
predSplitIdx
);
String
predLocalName
=
predString
.
substring
(
predSplitIdx
);
try
{
if
(!
headerWritten
)
{
writeHeader
();
}
// SUBJECT
if
(!
subj
.
equals
(
lastWrittenSubject
))
{
flushPendingStatements
();
// Write new subject:
writeNewLine
();
writeStartOfStartTag
(
RDF
.
NAMESPACE
,
"Description"
);
if
(
subj
instanceof
BNode
)
{
BNode
bNode
=
(
BNode
)
subj
;
writeAttribute
(
RDF
.
NAMESPACE
,
"nodeID"
,
getValidNodeId
(
bNode
));
}
else
if
(
baseIRI
!=
null
)
{
writeAttribute
(
RDF
.
NAMESPACE
,
"about"
,
baseIRI
.
relativize
(
subj
.
stringValue
()));
}
else
{
IRI
uri
=
(
IRI
)
subj
;
writeAttribute
(
RDF
.
NAMESPACE
,
"about"
,
uri
.
toString
());
}
writeEndOfStartTag
();
writeNewLine
();
lastWrittenSubject
=
subj
;
}
// PREDICATE
writeIndent
();
writeStartOfStartTag
(
predNamespace
,
predLocalName
);
// OBJECT
if
(
obj
instanceof
Resource
)
{
Resource
objRes
=
(
Resource
)
obj
;
if
(
objRes
instanceof
BNode
)
{
BNode
bNode
=
(
BNode
)
objRes
;
writeAttribute
(
RDF
.
NAMESPACE
,
"nodeID"
,
getValidNodeId
(
bNode
));
}
else
if
(
baseIRI
!=
null
)
{
writeAttribute
(
RDF
.
NAMESPACE
,
"resource"
,
baseIRI
.
relativize
(
objRes
.
stringValue
()));
}
else
{
IRI
uri
=
(
IRI
)
objRes
;
writeAttribute
(
RDF
.
NAMESPACE
,
"resource"
,
uri
.
toString
());
}
writeEndOfEmptyTag
();
}
else
if
(
obj
instanceof
Literal
)
{
Literal
objLit
=
(
Literal
)
obj
;
// datatype attribute
boolean
isXMLLiteral
=
false
;
// language attribute
if
(
Literals
.
isLanguageLiteral
(
objLit
))
{
writeAttribute
(
"xml:lang"
,
objLit
.
getLanguage
().
get
());
}
else
{
IRI
datatype
=
objLit
.
getDatatype
();
// Check if datatype is rdf:XMLLiteral
isXMLLiteral
=
datatype
.
equals
(
RDF
.
XMLLITERAL
);
if
(
isXMLLiteral
)
{
writeAttribute
(
RDF
.
NAMESPACE
,
"parseType"
,
"Literal"
);
}
else
if
(!
datatype
.
equals
(
XSD
.
STRING
))
{
writeAttribute
(
RDF
.
NAMESPACE
,
"datatype"
,
datatype
.
toString
());
}
}
writeEndOfStartTag
();
// label
if
(
isXMLLiteral
)
{
// Write XML literal as plain XML
writer
.
write
(
objLit
.
getLabel
());
}
else
{
writeCharacterData
(
objLit
.
getLabel
());
}
writeEndTag
(
predNamespace
,
predLocalName
);
}
writeNewLine
();
// Don't write </rdf:Description> yet, maybe the next statement
// has the same subject.
}
catch
(
IOException
e
)
{
throw
new
RDFHandlerException
(
e
);
}
}
}
src/main/scala/ch/memobase/edm/EDM.scala
View file @
909a6414
...
...
@@ -20,12 +20,18 @@
package
ch.memobase.edm
import
ujson.Value.Value
import
ch.memobase.rdf.Helper
import
ch.memobase.rdf.vocabularies.EDM
import
org.eclipse.rdf4j.model.Model
import
org.eclipse.rdf4j.model.impl.
{
DynamicModelFactory
,
SimpleValueFactory
}
import
org.eclipse.rdf4j.model.vocabulary.
{
DCTERMS
,
RDF
}
import
java.time.format.DateTimeFormatter
import
scala.collection.mutable
import
scala.collection.mutable.ArrayBuffer
import
scala.util.Try
import
ujson.
{
Arr
=>
JArr
,
Obj
=>
JObj
}
class
EDM
{
...
...
@@ -36,8 +42,12 @@ class EDM {
Try
{
val
graph
=
Extractors
.
jsonGraph
(
messageValue
).
get
.
arr
//val digitalObject = Extractors.digitalObject(graph).get
val
record
:
mutable.Map
[
String
,
Value
]
=
Extractors
.
record
(
graph
).
get
val
digitalObject
=
Extractors
.
digitalObject
(
graph
).
get
val
record
=
Extractors
.
record
(
graph
).
get
val
cho
=
createChoObject
(
graph
,
record
,
digitalObject
)
//edmRdfModel.serialize(record.toString)
...
...
@@ -52,7 +62,46 @@ class EDM {
}
}
private
def
createChoObject
(
graph
:
JArr
,
record
:
JObj
,
digitalObject
:
JObj
)
:
ExtractionResult
[
ProvidedCHO
]
=
{
val
cho
=
new
ProvidedCHO
cho
.
addId
(
Extractors
.
recordId
(
record
).
get
)
ExtractionResult
(
cho
)
}
}
case
class
ExtractionResult
[
T
](
obj
:
T
,
warnings
:
mutable.Buffer
[
String
]
=
mutable
.
Buffer
())
class
ProvidedCHO
{
import
org.eclipse.rdf4j.model.util.Values.iri
import
org.eclipse.rdf4j.model.util.Values.literal
private
var
_id
:
String
=
_
def
addId
(
id
:
String
)
:
Unit
=
{
_id
=
id
}
def
getModel
:
Model
=
{
val
factory
=
SimpleValueFactory
.
getInstance
()
val
model
=
Helper
.
getModelWithEDMNamespaces
model
.
add
(
iri
(
_id
),
RDF
.
TYPE
,
EDM
.
ProvidedCHO
)
//only for test purposes by now
model
.
add
(
iri
(
_id
),
DCTERMS
.
ISSUED
,
factory
.
createLiteral
(
"1993"
))
model
}
}
class
WebResource
class
Aggregation
src/main/scala/ch/memobase/edm/EDMRdf4jModel.scala
View file @
909a6414
...
...
@@ -31,7 +31,8 @@ class EDMRdf4jModel {
private
val
edmModel
=
getModelWithEDMNamespaces
def
serializeModel
(
lang
:
RDFFormat
=
RDFFormat
.
RDFXML
)
:
String
=
{
def
serializeModel
(
providedCho
:
ProvidedCHO
,
lang
:
RDFFormat
=
RDFFormat
.
RDFXML
)
:
String
=
{
val
sw
=
new
StringWriter
Rio
.
write
(
edmModel
,
sw
,
RDFFormat
.
RDFXML
)
...
...
src/main/scala/ch/memobase/edm/Extractors.scala
View file @
909a6414
...
...
@@ -192,4 +192,7 @@ object Extractors {
getBlankNodeContent
(
graph
)(
record
)(
"created"
).
flatMap
(
v
=>
stringValue
(
v
)(
"normalizedDateValue"
)
)
val
recordId
:
JObj
=>
Option
[
String
]
=
record
=>
stringValue
(
record
)(
"@id"
).
flatMap
(
Some
(
_
))
}
src/main/scala/ch/memobase/rdf/Helper.scala
0 → 100644
View file @
909a6414
/*
* rico2edm
* Copyright (C) 2021 UB Basel
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
package
ch.memobase.rdf
import
org.eclipse.rdf4j.model.Model
import
org.eclipse.rdf4j.model.util.ModelBuilder
object
Helper
{
def
getModelWithEDMNamespaces
:
Model
=
{
val
builder
=
new
ModelBuilder
builder
.
setNamespace
(
"dc"
,
"http://purl.org/dc/elements/1.1/"
)
.
setNamespace
(
"dcterms"
,
"http://purl.org/dc/terms/"
)
.
setNamespace
(
"edm"
,
"http://www.europeana.eu/schemas/edm/"
)
.
setNamespace
(
"ore"
,
"http://www.openarchives.org/ore/terms/"
)
.
setNamespace
(
"owl"
,
"http://www.w3.org/2002/07/owl#"
)
.
setNamespace
(
"rdf"
,
"http://www.w3.org/1999/02/22-rdf-syntax-ns#"
)
.
setNamespace
(
"foaf"
,
"http://xmlns.com/foaf/0.1/"
)
.
setNamespace
(
"wgs84_pos"
,
"http://www.w3.org/2004/02/skos/core#"
)
.
setNamespace
(
"crm"
,
"http://www.cidoc‐crm.org/cidoc‐crm/"
)
.
setNamespace
(
"cc"
,
"http://creativecommons.org/ns#"
)
.
build
()
}
}
\ No newline at end of file
src/main/scala/ch/memobase/rdf/vocabularies/VocabularyFactory.scala
0 → 100644
View file @
909a6414
/*
* rico2edm
* Copyright (C) 2021 UB Basel
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
package
ch.memobase.rdf.vocabularies
import
org.eclipse.rdf4j.model.IRI
import
org.eclipse.rdf4j.model.impl.SimpleValueFactory
sealed
abstract
class
VocabularyFactory
(
val
NAMESPACE
:
String
)
{
def
getIri
(
localName
:
String
)
:
IRI
=
SimpleValueFactory
.
getInstance
().
createIRI
(
NAMESPACE
,
localName
)
}
object
EDM
extends
VocabularyFactory
(
"http://www.europeana.eu/schemas/edm/"
)
{
val
ProvidedCHO
:
IRI
=
getIri
(
"ProvidedCHO"
)
}
src/
main
/resources/raw1L.json
→
src/
test
/resources/raw1L.json
View file @
909a6414
File moved
src/
main
/resources/rawIndent.json
→
src/
test
/resources/rawIndent.json
View file @
909a6414
File moved
src/test/scala/ch/memobase/edm/ExtractorSpec.scala
View file @
909a6414
...
...
@@ -21,6 +21,8 @@
package
ch.memobase.edm
import
ch.memobase.Utils.loadFile
import
ch.memobase.rdf.writer.RdfXmlWriter
import
org.eclipse.rdf4j.rio.
{
RDFFormat
,
Rio
}
import
org.scalatest.funsuite.AnyFunSuite
import
ujson.Value.Value
...
...
@@ -28,7 +30,7 @@ import scala.util.Try
class
ExtractorSpec
extends
AnyFunSuite
{
val
rawRico
:
String
=
{
loadFile
(
"src/
main
/resources/raw1L.json"
)
loadFile
(
"src/
test
/resources/raw1L.json"
)
//Extractors.jsonGraph(content)
//content
}
...
...
@@ -39,7 +41,7 @@ class ExtractorSpec extends AnyFunSuite{
val
rdfModel
=
new
EDMRdf4jModel
//val s = t.get.obj._1
//val resource = rdfModel.createResource("#UEDIN:214")
println
(
rdfModel
.
serializeModel
())
println
(
rdfModel
.
serializeModel
(
new
ProvidedCHO
))
}
...
...
@@ -50,12 +52,22 @@ class ExtractorSpec extends AnyFunSuite{
val
digitalObject
=
Extractors
.
digitalObject
(
graph
).
get
val
record
=
Extractors
.
record
(
graph
).
get
val
imageResourceId
=
Extractors
.
imageResourceId
(
digitalObject
)
//Extractors.manifestId(graph)
println
(
graph
)
println
(
digitalObject
)
println
(
record
)
val
recId
=
Extractors
.
recordId
(
record
)
val
cho
=
new
ProvidedCHO
cho
.
addId
(
Extractors
.
recordId
(
record
).
get
)
//use our own writer so we are able to create special EDM output requirements
val
rdfWriter
=
new
RdfXmlWriter
(
System
.
out
)
Rio
.
write
(
cho
.
getModel
,
rdfWriter
)
//println(graph)
//println(digitalObject)
//println(record)
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment