Commit b06ff221 authored by Jonas Waeber's avatar Jonas Waeber
Browse files

Add unit tests from normalizer

parent 85573306
......@@ -87,7 +87,7 @@ bintray {
name = 'normalizer-service-configuration'
repo = 'memobase'
userOrg = 'memoriav'
desc = 'Parser & validation for the mapper service configuration.'
desc = 'Parser & validation for the normalizer service configuration.'
licenses = ['Apache-2.0']
vcsUrl = 'https://gitlab.switch.ch/memoriav/memobase-2020/libraries/normalizer-service-configuration'
}
......
package org.memobase
import ch.memobase.helpers.Date
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.TestInstance
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
class TestDates {
@Test
fun `test month symbol map`() {
val dez = Date.validateMonthValue("Dezember")
val feb = Date.validateMonthValue("févr.")
val march = Date.validateMonthValue("5")
val nullDate = Date.validateMonthValue(null)
assertThat(dez)
.isEqualTo("12")
assertThat(feb)
.isEqualTo("02")
assertThat(march)
.isEqualTo("05")
assertThat(nullDate)
.isEqualTo(null)
}
}
import ch.memobase.configs.GlobalTransformsLoader
import ch.memobase.configs.LocalTransformsLoader
import java.io.File
import java.io.FileInputStream
import java.nio.charset.Charset
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.TestInstance
......
package org.memobase
import ch.memobase.model.NormalizeLanguages
import ch.memobase.rdf.MemobaseModel
import org.apache.jena.riot.RDFDataMgr
import org.apache.jena.riot.RDFFormat
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.TestInstance
import org.junit.jupiter.api.assertAll
import org.memobase.rdf.NS
import org.memobase.rdf.RICO
import java.io.FileOutputStream
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
internal class TestLanguageNormalizer {
/*
TODO: Find a way to properly test / validate rdf structures. (Use SHACL shapes)
*/
@Test
fun `test language normalizer`() {
val memobaseModel = MemobaseModel()
NS.prefixMapping.map {
memobaseModel.setNsPrefix(it.key, it.value)
}
val language =
memobaseModel.createRicoResource(RICO.Language)
.addLiteral(RICO.name, "Deutsch")
.addLiteral(RICO.type, "caption")
val record = memobaseModel.createRicoResource(RICO.Record)
.addProperty(RICO.hasLanguage, language)
val n = NormalizeLanguages(
"src/test/resources/facets/languages.csv",
"src/test/resources/facets/language_labels.csv"
)
val transform = n.generate()
val output = transform.transform(language, memobaseModel)
RDFDataMgr.write(
FileOutputStream("src/test/resources/tmp/turtle-output-language-normalization.ttl"),
memobaseModel,
RDFFormat.TURTLE_PRETTY
)
assertAll("",
{
assertThat(output).isEmpty()
}
)
}
}
\ No newline at end of file
import ch.memobase.configs.LocalTransformsLoader
import java.io.File
import java.io.FileInputStream
import java.nio.charset.Charset
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.TestInstance
import org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS
import java.io.File
import java.io.FileInputStream
import java.nio.charset.Charset
@TestInstance(PER_CLASS)
......@@ -18,6 +18,5 @@ class TestLocalTransformLoader {
val result = local.getByteStream()
assertThat(bytes.toString(Charset.defaultCharset()))
.isEqualTo(result.toString(Charset.defaultCharset()))
}
}
\ No newline at end of file
package org.memobase
import ch.memobase.rdf.MemobaseModel
import org.apache.jena.rdf.model.Statement
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.TestInstance
import org.memobase.rdf.RICO
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
class TestRicoResources {
@Test
fun `test remove literal statement`() {
val memobaseModel = MemobaseModel()
val person = memobaseModel.createRicoResource(RICO.Person)
.addLiteral(RICO.name, "TEST")
.addLiteral(RICO.name, "KEEP")
person.removeAllProperties(RICO.name, "TEST")
assertThat(person)
.extracting { it.listProperties(RICO.name) }
.asList()
.allMatch {
it as Statement
it.string == "KEEP"
}
}
@Test
fun `test remove no matching literal statement`() {
val memobaseModel = MemobaseModel()
val person = memobaseModel.createRicoResource(RICO.Person)
.addLiteral(RICO.name, "KEEP1")
.addLiteral(RICO.name, "KEEP2")
person.removeAllProperties(RICO.name, "TEST")
assertThat(person)
.extracting { it.listProperties(RICO.name) }
.asList()
.allMatch {
it as Statement
it.string == "KEEP1" || it.string == "KEEP2"
}
}
}
/*
* normalization service
* Copyright (C) 2019 Memobase
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase
import ch.memobase.rdf.MemobaseModel
import ch.memobase.rdf.RicoResource
import ch.memobase.transform.ExtractCreationRelationTransform
import ch.memobase.transform.PersonNormalizer
import ch.memobase.transform.SplitEntityTransform
import org.apache.jena.rdf.model.Statement
import org.apache.jena.sparql.vocabulary.FOAF
import org.apache.logging.log4j.LogManager
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.TestInstance
import org.junit.jupiter.api.assertAll
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.MethodSource
import org.memobase.params.PersonNormalizerParams
import org.memobase.rdf.RDF
import org.memobase.rdf.RICO
import org.memobase.rdf.SKOS
import params.EntitySplitterParams
import java.io.File
import java.nio.charset.Charset
import java.util.stream.Stream
import kotlin.test.assertEquals
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
class TestTransformers {
private val log = LogManager.getLogger("TransformerTests")
private val resourcePath = "src/test/resources/data"
private fun readFile(fileName: String): String {
return File("$resourcePath/$fileName").readText(Charset.defaultCharset())
}
@Test
fun `test creation relation name extractor`() {
val creationRelationTransform = ExtractCreationRelationTransform(Regex("\\((?<relation>.*)\\)"), "fr")
val memobaseModel = MemobaseModel()
val person =
memobaseModel.createRicoResource(RICO.Person)
.addLiteral(RICO.name, "Adrien Porchet (photographie)")
val creationRelation =
memobaseModel.createRicoResource(RICO.CreationRelation, "contributor")
.addLiteral(RICO.name, "DUMMY-VALUE")
.addProperty(RICO.creationRelationHasTarget, person)
person.addProperty(RICO.agentIsTargetOfCreationRelation, creationRelation)
val result = creationRelationTransform.transform(creationRelation, memobaseModel)
assertAll("",
{
assertThat(creationRelation)
.extracting { it.listProperties(RICO.name) }
.asList()
.hasOnlyElementsOfType(Statement::class.java)
.allMatch {
it as Statement
it.`object`.asLiteral().value == "photographie"
}
.size().isEqualTo(1)
}
)
}
@ParameterizedTest
@MethodSource("entitySplitterParams")
fun `test entity splitter simple`(params: EntitySplitterParams) {
val entitySplitter = SplitEntityTransform(params.type, params.splitterLiteral, params.delimiter)
val model = MemobaseModel()
val record = model.createRicoResource(RICO.Record)
val splitResource =
model.createRicoResource(params.type)
.addLiteral(params.splitterLiteral, params.value)
record.addProperty(params.recordToEntity, splitResource.resource)
val result = entitySplitter.transform(splitResource, model)
val results = model.listSubjectsWithProperty(RDF.type, params.type).mapWith { RicoResource(it) }.toList()
results.sortBy { value -> value.getStringLiteral(params.splitterLiteral) }
assertAll("entity splitter tests",
{ assertEquals(2, results.size) },
{ assertEquals(true, results[0].hasProperty(params.splitterLiteral, params.splitValues[0])) },
{ assertEquals(true, results[1].hasProperty(params.splitterLiteral, params.splitValues[1])) },
{ assertEquals(2, record.listProperties(params.recordToEntity).toList().size) },
{ assertEquals(emptyList(), result) }
)
}
fun entitySplitterParams(): Stream<EntitySplitterParams> = Stream.of(
EntitySplitterParams(
RICO.Language,
RICO.name,
",",
RICO.hasLanguage,
"de,fr",
listOf("de", "fr")
),
EntitySplitterParams(
SKOS.Concept,
SKOS.prefLabel,
";",
RICO.hasSubject,
"POLITIK, INTERNATIONALE ORGANISATION; GESUNDHEIT, KRANKHEIT, EPIDEMIE",
listOf("GESUNDHEIT, KRANKHEIT, EPIDEMIE", "POLITIK, INTERNATIONALE ORGANISATION")
)
)
@Test
fun `test entity splitter with creation relation`() {
val entitySplitter = SplitEntityTransform(RICO.Person, RICO.name, ";")
val model = MemobaseModel()
val record = model.createRicoResource(RICO.Record)
val splitResource =
model.createRicoResource(RICO.Person)
.addLiteral(RICO.name, "Test Person 1; Test Person 2")
val creationRelation =
model.createRicoResource(RICO.CreationRelation, "creator")
.addProperty(RICO.creationRelationHasTarget, splitResource.resource)
.addProperty(RICO.creationRelationHasSource, record.resource)
.addLiteral(RICO.name, "Fotograf")
splitResource.addProperty(RICO.agentIsTargetOfCreationRelation, creationRelation.resource)
record.addProperty(RICO.recordResourceOrInstantiationIsSourceOfCreationRelation, creationRelation.resource)
val result = entitySplitter.transform(splitResource, model)
val results = model.listSubjectsWithProperty(RDF.type, RICO.Person).toList()
results.sortBy { value -> value.getProperty(RICO.name).string }
assertAll("entity splitter tests",
{ assertEquals(2, results.size) },
{ assertEquals(true, results[0].hasProperty(RICO.name, "Test Person 1")) },
{ assertEquals(true, results[1].hasProperty(RICO.name, "Test Person 2")) },
{
assertEquals(
2,
record.listProperties(RICO.recordResourceOrInstantiationIsSourceOfCreationRelation).toList().size
)
},
{ assertEquals(emptyList(), result) }
)
}
@Test
fun `test no entity split`() {
val entitySplitter = SplitEntityTransform(SKOS.Concept, SKOS.prefLabel, ";")
val model = MemobaseModel()
val record = model.createRicoResource(RICO.Record)
val splitResource =
model.createRicoResource(SKOS.Concept)
.addLiteral(SKOS.prefLabel, "subject1, subject2")
record.addProperty(RICO.hasSubject, splitResource.resource)
val result = entitySplitter.transform(splitResource, model)
val results = model.listSubjectsWithProperty(RDF.type, SKOS.Concept).mapWith { RicoResource(it) }.toList()
results.sortBy { value -> value.getStringLiteral(SKOS.prefLabel) }
assertAll("entity splitter tests",
{ assertEquals(1, results.size) },
{ assertEquals(true, results[0].hasProperty(SKOS.prefLabel, "subject1, subject2")) },
{ assertEquals(1, record.listProperties(RICO.hasSubject).toList().size) },
{ assertEquals(emptyList(), result) }
)
}
@Test
fun `test double space entity split`() {
val entitySplitter = SplitEntityTransform(SKOS.Concept, SKOS.prefLabel, " ")
val model = MemobaseModel()
val record = model.createRicoResource(RICO.Record)
val splitResource =
model.createRicoResource(SKOS.Concept)
.addLiteral(SKOS.prefLabel, "subject1 subject2")
record.addProperty(RICO.hasSubject, splitResource.resource)
val result = entitySplitter.transform(splitResource, model)
val results = model.listSubjectsWithProperty(RDF.type, SKOS.Concept).mapWith { RicoResource(it) }.toList()
results.sortBy { value -> value.getStringLiteral(SKOS.prefLabel) }
assertAll("entity splitter tests",
{ assertEquals(2, results.size) },
{ assertEquals(true, results[0].hasProperty(SKOS.prefLabel, "subject1")) },
{ assertEquals(true, results[1].hasProperty(SKOS.prefLabel, "subject2")) },
{ assertEquals(2, record.listProperties(RICO.hasSubject).toList().size) },
{ assertEquals(emptyList(), result) }
)
}
@ParameterizedTest
@MethodSource("personNormalizerParams")
fun `test person normalizer`(params: PersonNormalizerParams) {
val normalizer = PersonNormalizer(params.nameOrder, params.singleNameIsLastName, params.nameDelimiter)
val model = MemobaseModel()
val person =
model.createRicoResource(RICO.Person)
.addLiteral(RICO.name, params.name)
val result = normalizer.transform(person, model)
assertAll("person normalizer tests",
{ assertEquals(params.hasFirstName, person.hasProperty(FOAF.firstName, params.firstName)) },
{ assertEquals(params.hasLastName, person.hasProperty(FOAF.lastName, params.lastName)) },
{ assertEquals(emptyList(), result) }
)
}
fun personNormalizerParams(): Stream<PersonNormalizerParams> = Stream.of(
PersonNormalizerParams(
"last-to-first",
true,
",",
"Vogel, Peter",
"Peter",
true,
"Vogel",
true
),
PersonNormalizerParams(
"first-to-last",
true,
" ",
"Peter Vogel",
"Peter",
true,
"Vogel",
true
),
PersonNormalizerParams(
"first-to-last",
true,
" ",
"Peter Hans Vogel",
"Peter Hans",
true,
"Vogel",
true
),
PersonNormalizerParams(
"first-to-last",
true,
" ",
"Vogel",
"",
false,
"Vogel",
true
),
PersonNormalizerParams(
"first-to-last",
false,
" ",
"Peter",
"Peter",
true,
"",
false
)
)
}
package params
import org.apache.jena.rdf.model.Property
import org.apache.jena.rdf.model.Resource
data class EntitySplitterParams(
val type: Resource,
val splitterLiteral: Property,
val delimiter: String,
val recordToEntity: Property,
val value: String,
val splitValues: List<String>
)
package org.memobase.params
data class PersonNormalizerParams(
val nameOrder: String,
val singleNameIsLastName: Boolean,
val nameDelimiter: String,
val name: String,
val firstName: String,
val hasFirstName: Boolean,
val lastName: String,
val hasLastName: Boolean
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment