Commit 111bf316 authored by Jonas Waeber's avatar Jonas Waeber

Finish implementation for institution transform.

Add additional deployments.
parent 3f6abbe4
Pipeline #17050 passed with stages
in 5 minutes and 22 seconds
deploymentName: pp-institution-search
outputTopic: search-doc-output-institutions
inputTopic: fedora-output-json-institutions
reportingTopic: postprocessing-reporting
mediaServerUrl: https://media.memobase.k8s.unibas.ch/memo/
\ No newline at end of file
deploymentName: pp-record-set-search
outputTopic: search-doc-output-record-sets
inputTopic: fedora-output-json-record-sets
reportingTopic: postprocessing-reporting
mediaServerUrl: https://media.memobase.k8s.unibas.ch/memo/
\ No newline at end of file
......@@ -21,27 +21,46 @@ package org.memobase
import com.beust.klaxon.JsonArray
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
import org.memobase.helpers.*
import org.memobase.model.*
import org.memobase.helpers.Extract
import org.memobase.helpers.InstitutionTypeMapper
import org.memobase.helpers.KEYS
import org.memobase.model.InstitutionSearchDoc
import org.memobase.model.LanguageContainer
import org.memobase.model.Schema
class InstitutionSearchDocBuilder(path: String) {
private val log = LogManager.getLogger("InstitutionSearchDocBuilder")
private val institutionTypeMapper = InstitutionTypeMapper(path)
fun transform(key: String, input: Map<String, JsonObject>): Schema {
val institution = input["institution"] ?: throw InvalidInputException("No institution entity found in message $key.")
val identifiers = Filter.entitiesByProperty(KEYS.identifiedBy, institution, input)
val locations = Filter.entitiesByProperty(KEYS.hasLocation, institution, input)
val cantons = mutableListOf<JsonObject>()
val institution =
input["institution"] ?: throw InvalidInputException("No institution entity found in message $key.")
val identifiers = mutableListOf<JsonObject>()
val cantons = mutableListOf<LanguageContainer>()
val municipalities = mutableListOf<LanguageContainer>()
val addresses = mutableListOf<String>()
val postalCodes = mutableListOf<String>()
input.values.forEach {
if (it[KEYS.ricoType] == KEYS.LocationType.canton) {
cantons.add(it)
when {
it[KEYS.ricoType] == KEYS.LocationType.canton -> {
cantons.add(extractAdminUnit(it, "canton", "NoCantonFound"))
}
it[KEYS.ricoType] == KEYS.LocationType.municipality -> {
municipalities.add(extractAdminUnit(it, "municipality", "NoCityFound"))
}
it[KEYS.ricoType] == KEYS.IdentifierType.main -> {
identifiers.add(it)
}
it[KEYS.atType] == KEYS.Place -> {
addresses.addAll(Extract.listOfStrings(it[KEYS.wikidataAddresses]))
postalCodes.addAll(Extract.listOfStrings(it[KEYS.wikidataPostalCodes]))
}
}
}
val type = institution[KEYS.wikidataInstance].let {
when(it) {
when (it) {
is String -> listOf(institutionTypeMapper.getValue(it))
is JsonArray<*> -> it.map { any -> institutionTypeMapper.getValue(any as String) }
else -> {
......@@ -51,32 +70,8 @@ class InstitutionSearchDocBuilder(path: String) {
}
}
val name = Extract.languageContainer("institution", institution[KEYS.name]).let { names ->
when {
names.isEmpty() -> {
LanguageContainer.placeholder("NoNameFound")
}
names.size == 1 -> {
names[0]
}
else -> {
names.reduce { acc, languageContainer -> acc.merge(languageContainer) }
}
}
}
val canton = cantons.map {
it[KEYS.name].let { name ->
Extract.languageContainer("canton", name).reduce { acc, languageContainer -> acc.merge(languageContainer) }
}
}.let { c ->
when {
c.isEmpty() -> {
listOf(LanguageContainer.placeholder("NoCantonNameFound"))
}
else -> c
}
}
val name = extractLanguageContainer(institution[KEYS.name], "NoNameFound")
val description = extractLanguageContainer(institution[KEYS.descriptiveNote], "NoDescriptionFound")
return InstitutionSearchDoc(
institutionId = Extract.extractIdValue(identifiers, KEYS.IdentifierType.main) ?: "NoIdentifierFound",
......@@ -91,11 +86,42 @@ class InstitutionSearchDocBuilder(path: String) {
},
type = type,
name = name,
description = description,
documentType = listOf(LanguageContainer.placeholder("PLACEHOLDER")),
keyVisualLink = "placeholderlink",
canton = canton,
numberOfRecordSets = 0,
keyVisualLink = institution[KEYS.wikidataImage].let { if (it != null) it as String else "NoKeyVisualLinkDefined" },
canton = cantons,
city = municipalities,
address = addresses,
postalCodes = postalCodes,
numberOfRecordSets = Extract.listOfStrings(institution[KEYS.isHolderOf]).count(),
numberOfDocuments = 0
)
}
private fun extractLanguageContainer(value: Any?, placeholder: String): LanguageContainer {
return Extract.languageContainer("institution", value).let { items ->
when {
items.isEmpty() -> {
LanguageContainer.placeholder(placeholder)
}
items.size == 1 -> {
items[0]
}
else -> {
items.reduce { acc, languageContainer -> acc.merge(languageContainer) }
}
}
}
}
private fun extractAdminUnit(item: JsonObject, parent: String, placeholder: String): LanguageContainer {
return item[KEYS.name].let { name ->
Extract.languageContainer(parent, name).let {
when {
it.isEmpty() -> LanguageContainer.placeholder(placeholder)
else -> it.reduce { acc, languageContainer -> acc.merge(languageContainer) }
}
}
}
}
}
......@@ -8,7 +8,11 @@ object Default {
"UnknownId",
false,
LanguageContainer.EMPTY,
listOf(LanguageContainer.EMPTY),
LanguageContainer.EMPTY,
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
"",
......
......@@ -23,8 +23,6 @@ object KEYS {
const val mediaUrl = "media.url"
const val institutionTypeLabelsPath = "institutionTypeLabelsPath"
}
const val entityId = "@id"
const val atType = "@type"
const val isPublished = "isPublished"
......@@ -35,6 +33,8 @@ object KEYS {
const val name = "name"
const val title = "title"
const val prefLabel = "prefLabel"
const val descriptiveNote = "descriptiveNote"
const val isHolderOf = "isHolderOf"
const val agentIsTargetOfCreationRelation = "agentIsTargetOfCreationRelation"
const val hasSubject = "hasSubject"
......@@ -73,6 +73,9 @@ object KEYS {
const val wikidataInstance = "P31"
const val wikidataAddresses = "P669"
const val wikidataPostalCodes = "P281"
const val wikidataImage = "P18"
const val missingLabelDe = "FEHLENDES LABEL"
const val missingLabelFr = "L'ÉTIQUETTE MANQUANTE"
......
......@@ -6,8 +6,13 @@ import com.fasterxml.jackson.annotation.JsonInclude
data class InstitutionSearchDoc(
val institutionId: String,
val published: Boolean,
// Full Text Search
val name: LanguageContainer,
val description: LanguageContainer,
val city: List<LanguageContainer>,
val address: List<String>,
val postalCodes: List<String>,
// Facettes
val canton: List<LanguageContainer>,
......
......@@ -4,6 +4,8 @@
"@id": "https://memobase.ch/institution/RadioX",
"@type": "https://www.ica.org/standards/RiC/ontology#CorporateBody",
"eventType": "Create",
"http://memobase.ch/internal/isPublished": true,
"P18": "https://mb-wf1.memobase.unibas.ch/sites/default/files/styles/teaser/public/2020-10/1.jpg?itok=5ncVBnVQ",
"P2699": "http://online-archiv-radiox.com",
"P31": [
"http://www.wikidata.org/entity/Q327333",
......@@ -26,13 +28,14 @@
}
],
"hasLocation": [
"https://memobase.ch/institution/RadioX#genid3e44aabe-29bf-45cc-ae00-372817c7b84d",
"https://memobase.ch/institution/RadioX#genidb4d72c27-a269-4ef6-8693-c24661be214f"
"https://memobase.ch/institution/RadioX#genid046fabe7-88c0-4bb8-9b9a-92ef2f1847a6",
"https://memobase.ch/institution/RadioX#genid3b8acad3-2286-4e9f-a509-a979df1c586d"
],
"identifiedBy": [
"https://memobase.ch/institution/RadioX#genid87a391fc-a46f-4811-a5b5-7cae8447d929",
"https://memobase.ch/institution/RadioX#genid87dcf55a-a587-46b2-a69a-f85e638ff94f"
"https://memobase.ch/institution/RadioX#genid59243c70-e645-4d08-8366-621fe49c7d4a",
"https://memobase.ch/institution/RadioX#genidbea9d850-ce35-4162-adcf-3e90b8034392"
],
"isHolderOf": "https://memobase.ch/recordSet/RadioX-Kampagnen",
"name": [
{
"@language": "de",
......@@ -50,7 +53,35 @@
"type": "memobaseInstitution"
},
{
"@id": "https://memobase.ch/institution/RadioX#genid3287b9c5-4efe-499d-9384-2a1059ae9ddf",
"@id": "https://memobase.ch/institution/RadioX#genid046fabe7-88c0-4bb8-9b9a-92ef2f1847a6",
"@type": "https://www.ica.org/standards/RiC/ontology#Place",
"P131": [
"https://memobase.ch/institution/RadioX#genidf0ed5a81-95d5-479c-9427-ddd4407d8dd7",
"https://memobase.ch/institution/RadioX#genid4b67068f-c28e-4aab-8887-aade59ea8194"
],
"P17": "http://www.wikidata.org/entity/Q39",
"P281": "3005",
"P625": "46.9531243, 7.434256",
"P6375": "Helvetiapl. 5",
"P669": "Helvetiapl.",
"P670": "5"
},
{
"@id": "https://memobase.ch/institution/RadioX#genid3b8acad3-2286-4e9f-a509-a979df1c586d",
"@type": "https://www.ica.org/standards/RiC/ontology#Place",
"P131": [
"https://memobase.ch/institution/RadioX#genid3dc5d53d-0f0c-4c2b-845b-8721f6f805f6",
"https://memobase.ch/institution/RadioX#genida1db2644-261b-4425-ad12-590ea152ddd4"
],
"P17": "http://www.wikidata.org/entity/Q39",
"P281": "4142",
"P625": "47.5602571, 7.5804113",
"P6375": "Oslo-Strasse 8",
"P669": "Oslo-Strasse",
"P670": "8"
},
{
"@id": "https://memobase.ch/institution/RadioX#genid3dc5d53d-0f0c-4c2b-845b-8721f6f805f6",
"@type": "https://www.ica.org/standards/RiC/ontology#Place",
"sameAs": "http://www.wikidata.org/entity/Q69030",
"name": [
......@@ -70,21 +101,7 @@
"type": "municipality"
},
{
"@id": "https://memobase.ch/institution/RadioX#genid3e44aabe-29bf-45cc-ae00-372817c7b84d",
"@type": "https://www.ica.org/standards/RiC/ontology#Place",
"P131": [
"https://memobase.ch/institution/RadioX#genid74e3ce4c-42fb-4af6-8380-a2fd324c5342",
"https://memobase.ch/institution/RadioX#genid40fd3abd-77ae-47e9-9c07-c3e894db5210"
],
"P17": "http://www.wikidata.org/entity/Q39",
"P281": "3005",
"P625": "46.9531243, 7.434256",
"P6375": "Helvetiapl. 5",
"P669": "Helvetiapl.",
"P670": "5"
},
{
"@id": "https://memobase.ch/institution/RadioX#genid40fd3abd-77ae-47e9-9c07-c3e894db5210",
"@id": "https://memobase.ch/institution/RadioX#genid4b67068f-c28e-4aab-8887-aade59ea8194",
"@type": "https://www.ica.org/standards/RiC/ontology#Place",
"sameAs": "http://www.wikidata.org/entity/Q11911",
"name": [
......@@ -104,13 +121,13 @@
"type": "canton"
},
{
"@id": "https://memobase.ch/institution/RadioX#genid74e3ce4c-42fb-4af6-8380-a2fd324c5342",
"@type": "https://www.ica.org/standards/RiC/ontology#Place",
"name": "Bern",
"type": "municipality"
"@id": "https://memobase.ch/institution/RadioX#genid59243c70-e645-4d08-8366-621fe49c7d4a",
"@type": "https://www.ica.org/standards/RiC/ontology#Identifier",
"identifier": "test_radio_x",
"type": "oldMemobase"
},
{
"@id": "https://memobase.ch/institution/RadioX#genid7558b5df-79a2-49bd-987d-a7ccb938a232",
"@id": "https://memobase.ch/institution/RadioX#genida1db2644-261b-4425-ad12-590ea152ddd4",
"@type": "https://www.ica.org/standards/RiC/ontology#Place",
"sameAs": "http://www.wikidata.org/entity/Q12172",
"name": [
......@@ -130,33 +147,25 @@
"type": "canton"
},
{
"@id": "https://memobase.ch/institution/RadioX#genid87a391fc-a46f-4811-a5b5-7cae8447d929",
"@id": "https://memobase.ch/institution/RadioX#genidbea9d850-ce35-4162-adcf-3e90b8034392",
"@type": "https://www.ica.org/standards/RiC/ontology#Identifier",
"identifier": "RadioX",
"type": "main"
},
{
"@id": "https://memobase.ch/institution/RadioX#genid87dcf55a-a587-46b2-a69a-f85e638ff94f",
"@type": "https://www.ica.org/standards/RiC/ontology#Identifier",
"identifier": "test_radio_x",
"type": "oldMemobase"
},
{
"@id": "https://memobase.ch/institution/RadioX#genidb4d72c27-a269-4ef6-8693-c24661be214f",
"@id": "https://memobase.ch/institution/RadioX#genidf0ed5a81-95d5-479c-9427-ddd4407d8dd7",
"@type": "https://www.ica.org/standards/RiC/ontology#Place",
"P131": [
"https://memobase.ch/institution/RadioX#genid3287b9c5-4efe-499d-9384-2a1059ae9ddf",
"https://memobase.ch/institution/RadioX#genid7558b5df-79a2-49bd-987d-a7ccb938a232"
],
"P17": "http://www.wikidata.org/entity/Q39",
"P281": "4142",
"P625": "47.5602571, 7.5804113",
"P6375": "Oslo-Strasse 8",
"P669": "Oslo-Strasse",
"P670": "8"
"name": "Bern",
"type": "municipality"
}
],
"@context": {
"type": {
"@id": "https://www.ica.org/standards/RiC/ontology#type"
},
"name": {
"@id": "https://www.ica.org/standards/RiC/ontology#name"
},
"P625": {
"@id": "http://www.wikidata.org/prop/direct/P625"
},
......@@ -183,18 +192,16 @@
"sameAs": {
"@id": "http://schema.org/sameAs"
},
"type": {
"@id": "https://www.ica.org/standards/RiC/ontology#type"
},
"name": {
"@id": "https://www.ica.org/standards/RiC/ontology#name"
},
"identifier": {
"@id": "https://www.ica.org/standards/RiC/ontology#identifier"
},
"P2699": {
"@id": "http://www.wikidata.org/prop/direct/P2699"
},
"hasLocation": {
"@id": "https://www.ica.org/standards/RiC/ontology#hasLocation",
"@type": "@id"
},
"descriptiveNote": {
"@id": "https://www.ica.org/standards/RiC/ontology#descriptiveNote"
},
......@@ -206,18 +213,24 @@
"@id": "https://www.ica.org/standards/RiC/ontology#identifiedBy",
"@type": "@id"
},
"hasLocation": {
"@id": "https://www.ica.org/standards/RiC/ontology#hasLocation",
"@type": "@id"
},
"P968": {
"@id": "http://www.wikidata.org/prop/direct/P968"
},
"P18": {
"@id": "http://www.wikidata.org/prop/direct/P18"
},
"eventType": {
"@id": "http://memobase.ch/internal/eventType"
},
"P856": {
"@id": "http://www.wikidata.org/prop/direct/P856"
},
"isHolderOf": {
"@id": "https://www.ica.org/standards/RiC/ontology#isHolderOf"
},
"isPublished": {
"@id": "http://memobase.ch/internal/isPublished",
"@type": "http://www.w3.org/2001/XMLSchema#boolean"
}
}
}
\ No newline at end of file
{"institutionId":"RadioX","published":false,"name":{"de":["RadioX"],"fr":["Test institution Oana (FR)"],"it":["Test institution Oana (IT)"],"un":[]},"canton":[{"de":["Bern"],"fr":["Berne"],"it":["Berne"],"un":[]},{"de":["Basel-Stadt"],"fr":["Bâle-Ville"],"it":["Bâle-Ville"],"un":[]}],"type":[{"de":["Behörde"],"fr":["autorité"],"it":["autorità │"],"un":[]},{"de":["Archiv"],"fr":["Archives"],"it":["Archivio │"],"un":[]}],"documentType":[{"de":["PLACEHOLDER"],"fr":["PLACEHOLDER"],"it":["PLACEHOLDER"],"un":["PLACEHOLDER"]}],"keyVisualLink":"placeholderlink","numberOfRecordSets":0,"numberOfDocuments":0,"id":"RadioX"}
\ No newline at end of file
{"institutionId":"RadioX","published":false,"name":{"de":["RadioX"],"fr":["Test institution Oana (FR)"],"it":["Test institution Oana (IT)"],"un":[]},"description":{"de":["<p>Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem.</p>"],"fr":["<p>test_description&nbsp;(FR)</p>"],"it":["<p>test_description&nbsp;&nbsp;(IT)</p>"],"un":[]},"city":[{"de":["Münchenstein"],"fr":["Münchenstein"],"it":["Münchenstein"],"un":[]},{"de":[],"fr":[],"it":[],"un":["Bern"]}],"canton":[{"de":["Bern"],"fr":["Berne"],"it":["Berne"],"un":[]},{"de":["Basel-Stadt"],"fr":["Bâle-Ville"],"it":["Bâle-Ville"],"un":[]}],"type":[{"de":["Behörde"],"fr":["autorité"],"it":["autorità │"],"un":[]},{"de":["Archiv"],"fr":["Archives"],"it":["Archivio │"],"un":[]}],"documentType":[{"de":["PLACEHOLDER"],"fr":["PLACEHOLDER"],"it":["PLACEHOLDER"],"un":["PLACEHOLDER"]}],"keyVisualLink":"https://mb-wf1.memobase.unibas.ch/sites/default/files/styles/teaser/public/2020-10/1.jpg?itok=5ncVBnVQ","numberOfRecordSets":1,"numberOfDocuments":0,"id":"RadioX"}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment