Commit ca893d37 authored by Jonas Waeber's avatar Jonas Waeber

Add person and place facet builders

parent ba74b75d
......@@ -19,7 +19,7 @@
package org.memobase
object KEYS {
const val entityId = "@id"
const val atType = "@type"
const val ricoType = "type"
const val firstName = "firstName"
......@@ -29,6 +29,8 @@ object KEYS {
const val contributor = "contributor"
const val creator = "creator"
const val Person = "Person"
const val Instantiation = "Instantiation"
const val Place = "Place"
}
......@@ -20,6 +20,10 @@ package org.memobase
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
import org.memobase.builders.IFieldBuilder
import org.memobase.builders.PersonContainerBuilder
import org.memobase.builders.PersonFacetBuilder
import org.memobase.builders.PlaceFacetBuilder
import org.memobase.helpers.CarrierType
import org.memobase.helpers.Extract
import org.memobase.helpers.Filter
......@@ -32,9 +36,37 @@ import org.memobase.rdf.NS
class SearchDocTransform {
private val log = LogManager.getLogger("SearchDocTransform")
fun transform(input: Map<String, JsonObject>): SearchDoc {
val record = input["record"] ?: error("No record defined in this message.")
val digitalObject = input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "digitalObject" }
val physicalObject = input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "physicalObject" }
val digitalObject =
input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "digitalObject" }
val physicalObject =
input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "physicalObject" }
val personFacetBuilder = PersonFacetBuilder()
val subjectPersonBuilder = PersonContainerBuilder("hasSubject", record, null, input)
val publisherPersonBuilder = PersonContainerBuilder("publishedBy", record, null, input)
val producersPersonBuilder = PersonContainerBuilder("P60441", record, null, input)
val contributorPersonBuilder = PersonContainerBuilder("", record, KEYS.contributor, input)
val creatorPersonBuilder = PersonContainerBuilder("", record, KEYS.creator, input)
val placeFacetBuilder = PlaceFacetBuilder()
for (item in input.values) {
for (builder: IFieldBuilder in listOf(
personFacetBuilder,
subjectPersonBuilder,
publisherPersonBuilder,
producersPersonBuilder,
contributorPersonBuilder,
creatorPersonBuilder,
placeFacetBuilder
)) {
if (builder.filter(item)) {
builder.append(item)
}
}
}
val id = record["@id"] as String
......@@ -104,7 +136,7 @@ class SearchDocTransform {
dateIssued = Extract.extractDate(datesIssued),
placeCapture = Extract.extractPlaces(placeCaptured),
placeRelated = Extract.extractPlaces(placesRelated),
place = Extract.facetEntity(placeCaptured + placesRelated, "name"),
placeFacet = placeFacetBuilder.build(),
rightsHolder = Extract.typedEntityByType(recordRules, "type", "holder", "name"),
memoriavClaim = record["P60451"] != null,
languageCaption = Extract.typedEntityByType(recordLanguages, "type", "caption", "name"),
......@@ -112,55 +144,45 @@ class SearchDocTransform {
language = Extract.facetEntity(recordLanguages, "name"),
genre = Extract.facetEntity(genre, "prefLabel"),
keywords = Extract.facetEntity(subjects, "prefLabel"),
agentSubject = Extract.typedEntityByType(subjects, "@type", "Agent", "name"),
personSubject = Extract.typedEntityByType(subjects, "@type", "Person", "name"),
personSubject = subjectPersonBuilder.build(),
personProducer = producersPersonBuilder.build(),
personPublisher = publisherPersonBuilder.build(),
personContributor = contributorPersonBuilder.build(),
personCreator = creatorPersonBuilder.build(),
personsFacet = personFacetBuilder.build(),
corporateBodySubject = Extract.typedEntityByType(subjects, "@type", "CorporateBody", "name"),
agentProducer = Extract.typedEntityByType(producers, "@type", "Agent", "name"),
personProducer = Extract.typedEntityByType(producers, "@type", "Person", "name"),
corporateBodyProducer = Extract.typedEntityByType(producers, "@type", "CorporateBody", "name"),
agentPublisher = Extract.typedEntityByType(publishers, "@type", "Agent", "name"),
personPublisher = Extract.typedEntityByType(publishers, "@type", "Person", "name"),
corporateBodyPublisher = Extract.typedEntityByType(publishers, "@type", "CorporateBody", "name"),
agentContributor = Extract.extractTypedCreationRelationAgent(
creationRelationAgents,
input,
creationRelationTypeParam = "contributor",
agentTypeParam = NS.rico + "Agent"
),
personContributor = Extract.extractTypedCreationRelationAgent(
creationRelationAgents,
input,
creationRelationTypeParam = "contributor",
agentTypeParam = NS.rico + "Person"
),
corporateBodyContributor = Extract.extractTypedCreationRelationAgent(
creationRelationAgents,
input,
creationRelationTypeParam = "contributor",
agentTypeParam = NS.rico + "CorporateBody"
),
agentCreator = Extract.extractTypedCreationRelationAgent(
corporateBodyCreator = Extract.extractTypedCreationRelationAgent(
creationRelationAgents,
input,
creationRelationTypeParam = "creator",
agentTypeParam = NS.rico + "Agent"
agentTypeParam = NS.rico + "CorporateBody"
),
personCreator = Extract.extractTypedCreationRelationAgent(
agentSubject = Extract.typedEntityByType(subjects, "@type", "Agent", "name"),
agentProducer = Extract.typedEntityByType(producers, "@type", "Agent", "name"),
agentPublisher = Extract.typedEntityByType(publishers, "@type", "Agent", "name"),
agentContributor = Extract.extractTypedCreationRelationAgent(
creationRelationAgents,
input,
creationRelationTypeParam = "creator",
agentTypeParam = NS.rico + "Person"
creationRelationTypeParam = "contributor",
agentTypeParam = NS.rico + "Agent"
),
corporateBodyCreator = Extract.extractTypedCreationRelationAgent(
agentCreator = Extract.extractTypedCreationRelationAgent(
creationRelationAgents,
input,
creationRelationTypeParam = "creator",
agentTypeParam = NS.rico + "CorporateBody"
agentTypeParam = NS.rico + "Agent"
),
// TODO: Implement collected agents!
persons = emptyList(),
corporateBodies = emptyList(),
agents = emptyList(),
// DIGITAL & PHYSICAL
access = accessPhysical + accessDigital,
......@@ -171,7 +193,10 @@ class SearchDocTransform {
colourDigital = Extract.listOfStrings(digitalObject?.get("P60558")),
digitalObjectNote = Extract.languageContainer("descriptiveNote", digitalObject?.get("descriptiveNote")),
locator = Extract.listOfStrings(digitalObject?.get("locator")),
usageConditionsDigital = Extract.languageContainer("conditionsOfUse", digitalObject?.get("conditionsOfUse")),
usageConditionsDigital = Extract.languageContainer(
"conditionsOfUse",
digitalObject?.get("conditionsOfUse")
),
usageDigital = usageDigital,
usageDigitalGroup = usageDigital.map { ReuseStatementMap.getValue(it) },
......@@ -179,11 +204,18 @@ class SearchDocTransform {
accessPhysical = accessPhysical,
durationPhysical = Extract.listOfStrings(physicalObject?.get("duration")),
colourPhysical = Extract.languageContainer("P60558", physicalObject?.get("P60558")),
physicalCharacteristics = Extract.languageContainer("physicalCharacteristics", physicalObject?.get("physicalCharacteristics")),
physicalCharacteristics = Extract.languageContainer(
"physicalCharacteristics",
physicalObject?.get("physicalCharacteristics")
),
physicalObjectNote = Extract.languageContainer("descriptiveNote", physicalObject?.get("descriptiveNote")),
usageConditionsPhysical = Extract.languageContainer("conditionsOfUse", physicalObject?.get("conditionsOfUse")),
usageConditionsPhysical = Extract.languageContainer(
"conditionsOfUse",
physicalObject?.get("conditionsOfUse")
),
usagePhysical = Extract.typedEntityByType(physicalRules, "type", "usage", "sameAs").flatMap { it.toList() },
callNumber = Extract.typedEntityByType(physicalIdentifiers, "type", "callNumber", "identifier").flatMap { it.toList() },
callNumber = Extract.typedEntityByType(physicalIdentifiers, "type", "callNumber", "identifier")
.flatMap { it.toList() },
format = format
)
}
......
......@@ -22,7 +22,7 @@ import com.beust.klaxon.JsonObject
interface IFieldBuilder {
fun filter(jsonObject: JsonObject, map: Map<String, JsonObject>): Boolean
fun filter(jsonObject: JsonObject): Boolean
fun append(jsonObject: JsonObject): String
fun build(): List<String>
fun build(): List<Any>
}
/*
* search-doc-service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase.builders
import com.beust.klaxon.JsonArray
import com.beust.klaxon.JsonObject
import org.memobase.KEYS
import org.memobase.helpers.Extract
import org.memobase.helpers.FacetBuildHelpers
import org.memobase.model.PersonContainer
import org.memobase.rdf.NS
class PersonContainerBuilder(
private val sourceProperty: String,
private val sourceResource: JsonObject,
private val creationRelationType: String?,
private val inputMap: Map<String, JsonObject>
) :
IFieldBuilder {
private val containers = mutableListOf<PersonContainer>()
override fun filter(jsonObject: JsonObject): Boolean {
if (sourceResource[KEYS.atType] != NS.rico + KEYS.Person) return false
return if (creationRelationType != null) {
jsonObject[KEYS.agentIsTargetOfCreationRelation].let {
if (it != null)
inputMap[it].let { creationRelation ->
creationRelation?.get(KEYS.ricoType) == creationRelationType
}
else
false
}
} else {
sourceResource[sourceProperty].let {
when (it) {
is String -> it == jsonObject[KEYS.entityId] as String
is JsonObject -> it[KEYS.entityId] as String == jsonObject[KEYS.entityId] as String
is JsonArray<*> -> {
it.any { child ->
when (child) {
is String -> child == jsonObject[KEYS.entityId] as String
is JsonObject -> child[KEYS.entityId] as String == jsonObject[KEYS.entityId] as String
else -> false
}
}
}
else -> false
}
}
}
}
override fun append(jsonObject: JsonObject): String {
val name = Extract.languageContainer("person-name", jsonObject[KEYS.name])
.reduce { acc, languageContainer -> acc.merge(languageContainer) }
val relation = if (jsonObject.containsKey(KEYS.agentIsTargetOfCreationRelation)) {
val creationRelation = inputMap[jsonObject[KEYS.agentIsTargetOfCreationRelation]]
creationRelation.let {
if (it != null)
Extract.languageContainer("creation-relation-name", it["name"])
.reduce { acc, languageContainer -> acc.merge(languageContainer) }
else
null
}
} else {
null
}
val facet = FacetBuildHelpers.person(jsonObject)
containers.add(
PersonContainer(
name,
relation,
facet
)
)
return "Created person container for person ${jsonObject[KEYS.entityId]}"
}
override fun build(): List<PersonContainer> {
return containers
}
}
......@@ -19,63 +19,35 @@
package org.memobase.builders
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
import org.memobase.KEYS
import org.memobase.helpers.AsciiFolder
import org.memobase.helpers.Extract
import org.memobase.helpers.FacetBuildHelpers
import org.memobase.rdf.NS
class PersonFacetBuilder(private val ricoType: String) : IFieldBuilder {
private val separator = "~"
private val terminator = "#"
private val level1 = "0"
private val level2 = "1"
private val isAlphaChar = Regex("[A-Za-z]")
class PersonFacetBuilder : IFieldBuilder {
private val log = LogManager.getLogger("PersonFacetBuilder")
private val personFacetValues = mutableSetOf<String>()
override fun filter(jsonObject: JsonObject, map: Map<String, JsonObject>): Boolean {
return if (jsonObject[KEYS.atType].let {
when (it) {
is String -> it == NS.rico + KEYS.Person
else -> false
}
}) {
val ids = Extract.identifier(jsonObject[KEYS.agentIsTargetOfCreationRelation])
ids
.map { map[it] }
.map { it?.get(KEYS.ricoType) }
.any { it != null && it is String && it == ricoType }
} else {
false
override fun filter(jsonObject: JsonObject): Boolean {
return jsonObject[KEYS.atType].let {
when (it) {
is String -> it == NS.rico + KEYS.Person
else -> false
}
}
}
override fun append(jsonObject: JsonObject): String {
val name = when {
jsonObject.containsKey("lastName") -> {
jsonObject["lastName"] as String
}
jsonObject.containsKey("name") -> {
jsonObject["name"] as String
}
else -> {
return "Failed to process person ${jsonObject["@id"]} for person facet, because the person does not have a name."
}
}
val displayName = jsonObject["lastName"].let { lastName ->
when (lastName) {
is String -> lastName + jsonObject["firstName"].let { if (it is String) ", $it" else "" }
else -> jsonObject["name"]
}
val result = FacetBuildHelpers.person(jsonObject)
return if (result.isNotEmpty()) {
personFacetValues.addAll(result)
""
} else {
val message = "Failed to create facet values for persons: ${jsonObject[KEYS.entityId]}."
log.warn(message)
message
}
val foldedName = AsciiFolder.foldToASCII(name)
val firstChar = foldedName.first { isAlphaChar.matches(it.toString()) }
val capitalLetter = firstChar.toUpperCase()
personFacetValues.add("$level1$separator$capitalLetter$separator$terminator")
personFacetValues.add("$level2$separator$capitalLetter$separator$displayName$separator$terminator")
return "Successfully added person to facet list."
}
override fun build(): List<String> {
......
/*
* search-doc-service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase.builders
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
import org.memobase.KEYS
import org.memobase.helpers.FacetBuildHelpers
import org.memobase.rdf.NS
class PlaceFacetBuilder : IFieldBuilder {
private val log = LogManager.getLogger("PersonFacetBuilder")
private val placeFacetValues = mutableSetOf<String>()
override fun filter(jsonObject: JsonObject): Boolean {
return jsonObject[KEYS.atType].let {
when (it) {
is String -> it == NS.rico + KEYS.Place
else -> false
}
}
}
override fun append(jsonObject: JsonObject): String {
val result = FacetBuildHelpers.place(jsonObject)
return if (result.isNotEmpty()) {
placeFacetValues.addAll(result)
""
} else {
val message = "Failed to create facet values for place: ${jsonObject[KEYS.entityId]}."
log.warn(message)
message
}
}
override fun build(): List<String> {
return placeFacetValues.toList().sortedBy { v -> v.substring(2) }
}
}
/*
* search-doc-service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase.helpers
import com.beust.klaxon.JsonObject
import org.memobase.KEYS
object FacetBuildHelpers {
private const val separator = "~"
private const val terminator = "#"
private const val level1 = "0"
private const val level2 = "1"
private val isAlphaChar = Regex("[A-Za-z]")
fun person(jsonObject: JsonObject): List<String> {
val name = when {
jsonObject.containsKey(KEYS.lastName) -> {
jsonObject[KEYS.lastName] as String
}
jsonObject.containsKey(KEYS.name) -> {
jsonObject[KEYS.name] as String
}
else -> {
return emptyList()
}
}
val displayName = jsonObject[KEYS.lastName].let { lastName ->
when (lastName) {
is String -> lastName + jsonObject[KEYS.firstName].let { if (it is String) ", $it" else "" }
else -> jsonObject[KEYS.name]
}
}
val capitalLetter = getCapitalLetter(name)
return listOf(
"$level1$separator$capitalLetter$separator$terminator",
"$level2$separator$capitalLetter$separator$displayName$separator$terminator"
)
}
fun place(jsonObject: JsonObject): List<String> {
if (!jsonObject.containsKey(KEYS.name)) {
throw Exception("Place does not contain a name: ${jsonObject[KEYS.entityId]}.")
}
val name = Extract.languageContainer("place", jsonObject[KEYS.name])
.reduce { acc, languageContainer -> acc.merge(languageContainer) }.any()
val capitalLetter = getCapitalLetter(name)
return listOf(
"$level1$separator$capitalLetter$separator$terminator",
"$level2$separator$capitalLetter$separator$name$separator$terminator"
)
}
private fun getCapitalLetter(name: String): Char {
val foldedName = AsciiFolder.foldToASCII(name)
val firstChar = foldedName.first { isAlphaChar.matches(it.toString()) }
return firstChar.toUpperCase()
}
}
......@@ -39,4 +39,14 @@ data class LanguageContainer(
un + container.un
)
}
}
\ No newline at end of file
fun any(): String {
return when {
un.isNotEmpty() -> un[0]
de.isNotEmpty() -> de[0]
fr.isNotEmpty() -> fr[0]
it.isNotEmpty() -> it[0]
else -> ""
}
}
}
/*
* search-doc-service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.memobase.model
data class PersonContainer(
val name: LanguageContainer,
val relation: LanguageContainer?,
val facet: List<String>
)
......@@ -47,31 +47,29 @@ data class SearchDoc(
val language: List<NameContainer>,
// Agents
val personSubject: List<LanguageContainer>,
val personCreator: List<CreatorNameContainer>,
val personContributor: List<CreatorNameContainer>,
val personPublisher: List<LanguageContainer>,
val personProducer: List<LanguageContainer>,
val persons: List<NameContainer>,
val personSubject: List<PersonContainer>,
val personCreator: List<PersonContainer>,
val personContributor: List<PersonContainer>,
val personPublisher: List<PersonContainer>,
val personProducer: List<PersonContainer>,
val personsFacet: List<String>,
val corporateBodySubject: List<LanguageContainer>,
val corporateBodyCreator: List<CreatorNameContainer>,
val corporateBodyContributor: List<CreatorNameContainer>,
val corporateBodyPublisher: List<LanguageContainer>,
val corporateBodyProducer: List<LanguageContainer>,
val corporateBodies: List<NameContainer>,
val agentSubject: List<LanguageContainer>,
val agentCreator: List<CreatorNameContainer>,
val agentContributor: List<CreatorNameContainer>,