Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
memoriav
Memobase 2020
libraries
Normalizer Service Configuration
Commits
b06ff221
Commit
b06ff221
authored
Oct 20, 2020
by
Jonas Waeber
Browse files
Add unit tests from normalizer
parent
85573306
Changes
9
Hide whitespace changes
Inline
Side-by-side
build.gradle
View file @
b06ff221
...
...
@@ -87,7 +87,7 @@ bintray {
name
=
'normalizer-service-configuration'
repo
=
'memobase'
userOrg
=
'memoriav'
desc
=
'Parser & validation for the
mapp
er service configuration.'
desc
=
'Parser & validation for the
normaliz
er service configuration.'
licenses
=
[
'Apache-2.0'
]
vcsUrl
=
'https://gitlab.switch.ch/memoriav/memobase-2020/libraries/normalizer-service-configuration'
}
...
...
src/test/kotlin/TestDates.kt
0 → 100644
View file @
b06ff221
package
org.memobase
import
ch.memobase.helpers.Date
import
org.assertj.core.api.Assertions.assertThat
import
org.junit.jupiter.api.Test
import
org.junit.jupiter.api.TestInstance
@TestInstance
(
TestInstance
.
Lifecycle
.
PER_CLASS
)
class
TestDates
{
@Test
fun
`test
month
symbol
map`
()
{
val
dez
=
Date
.
validateMonthValue
(
"Dezember"
)
val
feb
=
Date
.
validateMonthValue
(
"févr."
)
val
march
=
Date
.
validateMonthValue
(
"5"
)
val
nullDate
=
Date
.
validateMonthValue
(
null
)
assertThat
(
dez
)
.
isEqualTo
(
"12"
)
assertThat
(
feb
)
.
isEqualTo
(
"02"
)
assertThat
(
march
)
.
isEqualTo
(
"05"
)
assertThat
(
nullDate
)
.
isEqualTo
(
null
)
}
}
src/test/kotlin/TestGlobalTransformLoader.kt
View file @
b06ff221
import
ch.memobase.configs.GlobalTransformsLoader
import
ch.memobase.configs.LocalTransformsLoader
import
java.io.File
import
java.io.FileInputStream
import
java.nio.charset.Charset
import
org.assertj.core.api.Assertions.assertThat
import
org.junit.jupiter.api.Test
import
org.junit.jupiter.api.TestInstance
...
...
@@ -17,6 +13,6 @@ class TestGlobalTransformLoader {
val
global
=
GlobalTransformsLoader
(
file
)
val
transforms
=
global
.
get
()
assertThat
(
transforms
)
.
isNotEmpty
.
isNotEmpty
}
}
\ No newline at end of file
src/test/kotlin/TestLanguageNormalizer.kt
0 → 100644
View file @
b06ff221
package
org.memobase
import
ch.memobase.model.NormalizeLanguages
import
ch.memobase.rdf.MemobaseModel
import
org.apache.jena.riot.RDFDataMgr
import
org.apache.jena.riot.RDFFormat
import
org.assertj.core.api.Assertions.assertThat
import
org.junit.jupiter.api.Test
import
org.junit.jupiter.api.TestInstance
import
org.junit.jupiter.api.assertAll
import
org.memobase.rdf.NS
import
org.memobase.rdf.RICO
import
java.io.FileOutputStream
@TestInstance
(
TestInstance
.
Lifecycle
.
PER_CLASS
)
internal
class
TestLanguageNormalizer
{
/*
TODO: Find a way to properly test / validate rdf structures. (Use SHACL shapes)
*/
@Test
fun
`test
language
normalizer`
()
{
val
memobaseModel
=
MemobaseModel
()
NS
.
prefixMapping
.
map
{
memobaseModel
.
setNsPrefix
(
it
.
key
,
it
.
value
)
}
val
language
=
memobaseModel
.
createRicoResource
(
RICO
.
Language
)
.
addLiteral
(
RICO
.
name
,
"Deutsch"
)
.
addLiteral
(
RICO
.
type
,
"caption"
)
val
record
=
memobaseModel
.
createRicoResource
(
RICO
.
Record
)
.
addProperty
(
RICO
.
hasLanguage
,
language
)
val
n
=
NormalizeLanguages
(
"src/test/resources/facets/languages.csv"
,
"src/test/resources/facets/language_labels.csv"
)
val
transform
=
n
.
generate
()
val
output
=
transform
.
transform
(
language
,
memobaseModel
)
RDFDataMgr
.
write
(
FileOutputStream
(
"src/test/resources/tmp/turtle-output-language-normalization.ttl"
),
memobaseModel
,
RDFFormat
.
TURTLE_PRETTY
)
assertAll
(
""
,
{
assertThat
(
output
).
isEmpty
()
}
)
}
}
\ No newline at end of file
src/test/kotlin/TestLocalTransformLoader.kt
View file @
b06ff221
import
ch.memobase.configs.LocalTransformsLoader
import
java.io.File
import
java.io.FileInputStream
import
java.nio.charset.Charset
import
org.assertj.core.api.Assertions.assertThat
import
org.junit.jupiter.api.Test
import
org.junit.jupiter.api.TestInstance
import
org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS
import
java.io.File
import
java.io.FileInputStream
import
java.nio.charset.Charset
@TestInstance
(
PER_CLASS
)
...
...
@@ -17,7 +17,6 @@ class TestLocalTransformLoader {
local
.
get
()
val
result
=
local
.
getByteStream
()
assertThat
(
bytes
.
toString
(
Charset
.
defaultCharset
()))
.
isEqualTo
(
result
.
toString
(
Charset
.
defaultCharset
()))
.
isEqualTo
(
result
.
toString
(
Charset
.
defaultCharset
()))
}
}
\ No newline at end of file
src/test/kotlin/TestRicoResources.kt
0 → 100644
View file @
b06ff221
package
org.memobase
import
ch.memobase.rdf.MemobaseModel
import
org.apache.jena.rdf.model.Statement
import
org.assertj.core.api.Assertions.assertThat
import
org.junit.jupiter.api.Test
import
org.junit.jupiter.api.TestInstance
import
org.memobase.rdf.RICO
@TestInstance
(
TestInstance
.
Lifecycle
.
PER_CLASS
)
class
TestRicoResources
{
@Test
fun
`test
remove
literal
statement`
()
{
val
memobaseModel
=
MemobaseModel
()
val
person
=
memobaseModel
.
createRicoResource
(
RICO
.
Person
)
.
addLiteral
(
RICO
.
name
,
"TEST"
)
.
addLiteral
(
RICO
.
name
,
"KEEP"
)
person
.
removeAllProperties
(
RICO
.
name
,
"TEST"
)
assertThat
(
person
)
.
extracting
{
it
.
listProperties
(
RICO
.
name
)
}
.
asList
()
.
allMatch
{
it
as
Statement
it
.
string
==
"KEEP"
}
}
@Test
fun
`test
remove
no
matching
literal
statement`
()
{
val
memobaseModel
=
MemobaseModel
()
val
person
=
memobaseModel
.
createRicoResource
(
RICO
.
Person
)
.
addLiteral
(
RICO
.
name
,
"KEEP1"
)
.
addLiteral
(
RICO
.
name
,
"KEEP2"
)
person
.
removeAllProperties
(
RICO
.
name
,
"TEST"
)
assertThat
(
person
)
.
extracting
{
it
.
listProperties
(
RICO
.
name
)
}
.
asList
()
.
allMatch
{
it
as
Statement
it
.
string
==
"KEEP1"
||
it
.
string
==
"KEEP2"
}
}
}
src/test/kotlin/TestTransformers.kt
0 → 100644
View file @
b06ff221
/*
* normalization service
* Copyright (C) 2019 Memobase
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package
org.memobase
import
ch.memobase.rdf.MemobaseModel
import
ch.memobase.rdf.RicoResource
import
ch.memobase.transform.ExtractCreationRelationTransform
import
ch.memobase.transform.PersonNormalizer
import
ch.memobase.transform.SplitEntityTransform
import
org.apache.jena.rdf.model.Statement
import
org.apache.jena.sparql.vocabulary.FOAF
import
org.apache.logging.log4j.LogManager
import
org.assertj.core.api.Assertions.assertThat
import
org.junit.jupiter.api.Test
import
org.junit.jupiter.api.TestInstance
import
org.junit.jupiter.api.assertAll
import
org.junit.jupiter.params.ParameterizedTest
import
org.junit.jupiter.params.provider.MethodSource
import
org.memobase.params.PersonNormalizerParams
import
org.memobase.rdf.RDF
import
org.memobase.rdf.RICO
import
org.memobase.rdf.SKOS
import
params.EntitySplitterParams
import
java.io.File
import
java.nio.charset.Charset
import
java.util.stream.Stream
import
kotlin.test.assertEquals
@TestInstance
(
TestInstance
.
Lifecycle
.
PER_CLASS
)
class
TestTransformers
{
private
val
log
=
LogManager
.
getLogger
(
"TransformerTests"
)
private
val
resourcePath
=
"src/test/resources/data"
private
fun
readFile
(
fileName
:
String
):
String
{
return
File
(
"$resourcePath/$fileName"
).
readText
(
Charset
.
defaultCharset
())
}
@Test
fun
`test
creation
relation
name
extractor`
()
{
val
creationRelationTransform
=
ExtractCreationRelationTransform
(
Regex
(
"\\((?<relation>.*)\\)"
),
"fr"
)
val
memobaseModel
=
MemobaseModel
()
val
person
=
memobaseModel
.
createRicoResource
(
RICO
.
Person
)
.
addLiteral
(
RICO
.
name
,
"Adrien Porchet (photographie)"
)
val
creationRelation
=
memobaseModel
.
createRicoResource
(
RICO
.
CreationRelation
,
"contributor"
)
.
addLiteral
(
RICO
.
name
,
"DUMMY-VALUE"
)
.
addProperty
(
RICO
.
creationRelationHasTarget
,
person
)
person
.
addProperty
(
RICO
.
agentIsTargetOfCreationRelation
,
creationRelation
)
val
result
=
creationRelationTransform
.
transform
(
creationRelation
,
memobaseModel
)
assertAll
(
""
,
{
assertThat
(
creationRelation
)
.
extracting
{
it
.
listProperties
(
RICO
.
name
)
}
.
asList
()
.
hasOnlyElementsOfType
(
Statement
::
class
.
java
)
.
allMatch
{
it
as
Statement
it
.
`object`
.
asLiteral
().
value
==
"photographie"
}
.
size
().
isEqualTo
(
1
)
}
)
}
@ParameterizedTest
@MethodSource
(
"entitySplitterParams"
)
fun
`test
entity
splitter
simple`
(
params
:
EntitySplitterParams
)
{
val
entitySplitter
=
SplitEntityTransform
(
params
.
type
,
params
.
splitterLiteral
,
params
.
delimiter
)
val
model
=
MemobaseModel
()
val
record
=
model
.
createRicoResource
(
RICO
.
Record
)
val
splitResource
=
model
.
createRicoResource
(
params
.
type
)
.
addLiteral
(
params
.
splitterLiteral
,
params
.
value
)
record
.
addProperty
(
params
.
recordToEntity
,
splitResource
.
resource
)
val
result
=
entitySplitter
.
transform
(
splitResource
,
model
)
val
results
=
model
.
listSubjectsWithProperty
(
RDF
.
type
,
params
.
type
).
mapWith
{
RicoResource
(
it
)
}.
toList
()
results
.
sortBy
{
value
->
value
.
getStringLiteral
(
params
.
splitterLiteral
)
}
assertAll
(
"entity splitter tests"
,
{
assertEquals
(
2
,
results
.
size
)
},
{
assertEquals
(
true
,
results
[
0
].
hasProperty
(
params
.
splitterLiteral
,
params
.
splitValues
[
0
]))
},
{
assertEquals
(
true
,
results
[
1
].
hasProperty
(
params
.
splitterLiteral
,
params
.
splitValues
[
1
]))
},
{
assertEquals
(
2
,
record
.
listProperties
(
params
.
recordToEntity
).
toList
().
size
)
},
{
assertEquals
(
emptyList
(),
result
)
}
)
}
fun
entitySplitterParams
():
Stream
<
EntitySplitterParams
>
=
Stream
.
of
(
EntitySplitterParams
(
RICO
.
Language
,
RICO
.
name
,
","
,
RICO
.
hasLanguage
,
"de,fr"
,
listOf
(
"de"
,
"fr"
)
),
EntitySplitterParams
(
SKOS
.
Concept
,
SKOS
.
prefLabel
,
";"
,
RICO
.
hasSubject
,
"POLITIK, INTERNATIONALE ORGANISATION; GESUNDHEIT, KRANKHEIT, EPIDEMIE"
,
listOf
(
"GESUNDHEIT, KRANKHEIT, EPIDEMIE"
,
"POLITIK, INTERNATIONALE ORGANISATION"
)
)
)
@Test
fun
`test
entity
splitter
with
creation
relation`
()
{
val
entitySplitter
=
SplitEntityTransform
(
RICO
.
Person
,
RICO
.
name
,
";"
)
val
model
=
MemobaseModel
()
val
record
=
model
.
createRicoResource
(
RICO
.
Record
)
val
splitResource
=
model
.
createRicoResource
(
RICO
.
Person
)
.
addLiteral
(
RICO
.
name
,
"Test Person 1; Test Person 2"
)
val
creationRelation
=
model
.
createRicoResource
(
RICO
.
CreationRelation
,
"creator"
)
.
addProperty
(
RICO
.
creationRelationHasTarget
,
splitResource
.
resource
)
.
addProperty
(
RICO
.
creationRelationHasSource
,
record
.
resource
)
.
addLiteral
(
RICO
.
name
,
"Fotograf"
)
splitResource
.
addProperty
(
RICO
.
agentIsTargetOfCreationRelation
,
creationRelation
.
resource
)
record
.
addProperty
(
RICO
.
recordResourceOrInstantiationIsSourceOfCreationRelation
,
creationRelation
.
resource
)
val
result
=
entitySplitter
.
transform
(
splitResource
,
model
)
val
results
=
model
.
listSubjectsWithProperty
(
RDF
.
type
,
RICO
.
Person
).
toList
()
results
.
sortBy
{
value
->
value
.
getProperty
(
RICO
.
name
).
string
}
assertAll
(
"entity splitter tests"
,
{
assertEquals
(
2
,
results
.
size
)
},
{
assertEquals
(
true
,
results
[
0
].
hasProperty
(
RICO
.
name
,
"Test Person 1"
))
},
{
assertEquals
(
true
,
results
[
1
].
hasProperty
(
RICO
.
name
,
"Test Person 2"
))
},
{
assertEquals
(
2
,
record
.
listProperties
(
RICO
.
recordResourceOrInstantiationIsSourceOfCreationRelation
).
toList
().
size
)
},
{
assertEquals
(
emptyList
(),
result
)
}
)
}
@Test
fun
`test
no
entity
split`
()
{
val
entitySplitter
=
SplitEntityTransform
(
SKOS
.
Concept
,
SKOS
.
prefLabel
,
";"
)
val
model
=
MemobaseModel
()
val
record
=
model
.
createRicoResource
(
RICO
.
Record
)
val
splitResource
=
model
.
createRicoResource
(
SKOS
.
Concept
)
.
addLiteral
(
SKOS
.
prefLabel
,
"subject1, subject2"
)
record
.
addProperty
(
RICO
.
hasSubject
,
splitResource
.
resource
)
val
result
=
entitySplitter
.
transform
(
splitResource
,
model
)
val
results
=
model
.
listSubjectsWithProperty
(
RDF
.
type
,
SKOS
.
Concept
).
mapWith
{
RicoResource
(
it
)
}.
toList
()
results
.
sortBy
{
value
->
value
.
getStringLiteral
(
SKOS
.
prefLabel
)
}
assertAll
(
"entity splitter tests"
,
{
assertEquals
(
1
,
results
.
size
)
},
{
assertEquals
(
true
,
results
[
0
].
hasProperty
(
SKOS
.
prefLabel
,
"subject1, subject2"
))
},
{
assertEquals
(
1
,
record
.
listProperties
(
RICO
.
hasSubject
).
toList
().
size
)
},
{
assertEquals
(
emptyList
(),
result
)
}
)
}
@Test
fun
`test
double
space
entity
split`
()
{
val
entitySplitter
=
SplitEntityTransform
(
SKOS
.
Concept
,
SKOS
.
prefLabel
,
" "
)
val
model
=
MemobaseModel
()
val
record
=
model
.
createRicoResource
(
RICO
.
Record
)
val
splitResource
=
model
.
createRicoResource
(
SKOS
.
Concept
)
.
addLiteral
(
SKOS
.
prefLabel
,
"subject1 subject2"
)
record
.
addProperty
(
RICO
.
hasSubject
,
splitResource
.
resource
)
val
result
=
entitySplitter
.
transform
(
splitResource
,
model
)
val
results
=
model
.
listSubjectsWithProperty
(
RDF
.
type
,
SKOS
.
Concept
).
mapWith
{
RicoResource
(
it
)
}.
toList
()
results
.
sortBy
{
value
->
value
.
getStringLiteral
(
SKOS
.
prefLabel
)
}
assertAll
(
"entity splitter tests"
,
{
assertEquals
(
2
,
results
.
size
)
},
{
assertEquals
(
true
,
results
[
0
].
hasProperty
(
SKOS
.
prefLabel
,
"subject1"
))
},
{
assertEquals
(
true
,
results
[
1
].
hasProperty
(
SKOS
.
prefLabel
,
"subject2"
))
},
{
assertEquals
(
2
,
record
.
listProperties
(
RICO
.
hasSubject
).
toList
().
size
)
},
{
assertEquals
(
emptyList
(),
result
)
}
)
}
@ParameterizedTest
@MethodSource
(
"personNormalizerParams"
)
fun
`test
person
normalizer`
(
params
:
PersonNormalizerParams
)
{
val
normalizer
=
PersonNormalizer
(
params
.
nameOrder
,
params
.
singleNameIsLastName
,
params
.
nameDelimiter
)
val
model
=
MemobaseModel
()
val
person
=
model
.
createRicoResource
(
RICO
.
Person
)
.
addLiteral
(
RICO
.
name
,
params
.
name
)
val
result
=
normalizer
.
transform
(
person
,
model
)
assertAll
(
"person normalizer tests"
,
{
assertEquals
(
params
.
hasFirstName
,
person
.
hasProperty
(
FOAF
.
firstName
,
params
.
firstName
))
},
{
assertEquals
(
params
.
hasLastName
,
person
.
hasProperty
(
FOAF
.
lastName
,
params
.
lastName
))
},
{
assertEquals
(
emptyList
(),
result
)
}
)
}
fun
personNormalizerParams
():
Stream
<
PersonNormalizerParams
>
=
Stream
.
of
(
PersonNormalizerParams
(
"last-to-first"
,
true
,
","
,
"Vogel, Peter"
,
"Peter"
,
true
,
"Vogel"
,
true
),
PersonNormalizerParams
(
"first-to-last"
,
true
,
" "
,
"Peter Vogel"
,
"Peter"
,
true
,
"Vogel"
,
true
),
PersonNormalizerParams
(
"first-to-last"
,
true
,
" "
,
"Peter Hans Vogel"
,
"Peter Hans"
,
true
,
"Vogel"
,
true
),
PersonNormalizerParams
(
"first-to-last"
,
true
,
" "
,
"Vogel"
,
""
,
false
,
"Vogel"
,
true
),
PersonNormalizerParams
(
"first-to-last"
,
false
,
" "
,
"Peter"
,
"Peter"
,
true
,
""
,
false
)
)
}
src/test/kotlin/params/EntitySplitterParams.kt
0 → 100644
View file @
b06ff221
package
params
import
org.apache.jena.rdf.model.Property
import
org.apache.jena.rdf.model.Resource
data class
EntitySplitterParams
(
val
type
:
Resource
,
val
splitterLiteral
:
Property
,
val
delimiter
:
String
,
val
recordToEntity
:
Property
,
val
value
:
String
,
val
splitValues
:
List
<
String
>
)
src/test/kotlin/params/PersonNormalizerParams.kt
0 → 100644
View file @
b06ff221
package
org.memobase.params
data class
PersonNormalizerParams
(
val
nameOrder
:
String
,
val
singleNameIsLastName
:
Boolean
,
val
nameDelimiter
:
String
,
val
name
:
String
,
val
firstName
:
String
,
val
hasFirstName
:
Boolean
,
val
lastName
:
String
,
val
hasLastName
:
Boolean
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment