Commit 0e5460f2 authored by Jonas Waeber's avatar Jonas Waeber
Browse files

[WIP] Update record sets.

parent 3d631a1b
Pipeline #21672 failed with stages
in 2 minutes and 28 seconds
...@@ -34,7 +34,8 @@ ext { ...@@ -34,7 +34,8 @@ ext {
dependencies { dependencies {
compile group: 'org.elasticsearch.client', name: 'elasticsearch-rest-high-level-client', version: '7.6.1' compile group: 'org.elasticsearch.client', name: 'elasticsearch-rest-high-level-client', version: '7.6.1'
implementation 'org.memobase:memobase-service-utilities:2.0.5' implementation 'org.memobase:memobase-service-utilities:2.0.9'
implementation 'org.apache.jena:apache-jena:3.14.0'
// Logging Framework // Logging Framework
implementation "org.apache.logging.log4j:log4j-api:${log4jV}" implementation "org.apache.logging.log4j:log4j-api:${log4jV}"
......
...@@ -18,12 +18,16 @@ ...@@ -18,12 +18,16 @@
package org.memobase package org.memobase
import ch.memobase.rdf.DC
import ch.memobase.rdf.RDA
import ch.memobase.rdf.RDF
import ch.memobase.rdf.RICO
import com.beust.klaxon.JsonObject import com.beust.klaxon.JsonObject
import java.lang.NumberFormatException
import org.apache.logging.log4j.LogManager import org.apache.logging.log4j.LogManager
import org.memobase.helpers.Date import org.memobase.helpers.Date
import org.memobase.helpers.ElasticSearchWrapper import org.memobase.helpers.ElasticSearchWrapper
import org.memobase.helpers.Extract import org.memobase.helpers.Extract
import org.memobase.helpers.JSON
import org.memobase.helpers.KEYS import org.memobase.helpers.KEYS
import org.memobase.model.FacetContainer import org.memobase.model.FacetContainer
import org.memobase.model.IntegerRange import org.memobase.model.IntegerRange
...@@ -33,46 +37,80 @@ import org.memobase.model.Schema ...@@ -33,46 +37,80 @@ import org.memobase.model.Schema
class RecordSetSearchDocBuilder(private val elasticSearchWrapper: ElasticSearchWrapper) { class RecordSetSearchDocBuilder(private val elasticSearchWrapper: ElasticSearchWrapper) {
private val log = LogManager.getLogger("RecordSetSearchDocBuilder") private val log = LogManager.getLogger(this::class.java)
fun transform(key: String, input: Map<String, JsonObject>): Schema { fun transform(key: String, input: Map<String, JsonObject>): Schema {
val recordSet = val recordSet =
input["recordSet"] ?: throw InvalidInputException("No recordSet entity found in message $key.") input[JSON.recordSetTag] ?: throw InvalidInputException("No record set entity found in message $key.")
val publicationIds = Extract.identifiers(recordSet[RICO.isSubjectOf.localName])
val relatedRecordSetIds = Extract.identifiers(recordSet[RICO.isRecordResourceAssociatedWithRecordResource.localName])
val metadataLanguages = mutableListOf<JsonObject>() val metadataLanguages = mutableListOf<JsonObject>()
var originalTitles = LanguageContainer.EMPTY
var projectTitles = LanguageContainer.EMPTY
var relatedRecordSets = LanguageContainer.EMPTY
var publicationTitles = LanguageContainer.EMPTY
var relatedDocumentTitles = LanguageContainer.EMPTY
input.values.forEach { input.values.forEach {
when { when {
it[KEYS.ricoType] == KEYS.LanguageType.metadata -> { it[RICO.type.localName] == KEYS.LanguageType.metadata -> {
metadataLanguages.add(it) metadataLanguages.add(it)
} }
it[KEYS.atType] == RICO.Title.uri &&
it[RICO.type.localName] == KEYS.TitleTypes.original -> {
originalTitles = originalTitles.add(it[RICO.title.localName])
}
it[KEYS.atType] == RICO.CorporateBody.uri &&
it[RICO.type.localName] == KEYS.CorporateBodyType.memoriavProject -> {
projectTitles = projectTitles.add(it[RICO.title.localName])
}
it[KEYS.atType] == RICO.RecordSet.uri &&
relatedRecordSetIds.contains(it[KEYS.entityId]) -> {
relatedRecordSets = relatedRecordSets.add(it[RICO.title.localName])
}
it[KEYS.atType] == RICO.Record.uri -> {
if (publicationIds.contains(it[KEYS.entityId])) {
publicationTitles = publicationTitles.add(it[RICO.title.localName])
} else {
relatedDocumentTitles = relatedDocumentTitles.add(it[RICO.title.localName])
}
}
} }
} }
val name = extractLanguageContainer(recordSet[KEYS.title], "NoNameFound") val name = extractLanguageContainer(recordSet[RICO.title.localName], "")
val description = extractLanguageContainer(recordSet[KEYS.descriptiveNote], "NoDescriptionFound") val dates = Extract.identifiers(recordSet[RICO.isAssociatedWithDate.localName]).mapNotNull {
val dates = Extract.identifiers(recordSet[KEYS.isAssociatedWithDate]).mapNotNull {
input[it] input[it]
}.map { }.map {
it[KEYS.normalizedDateValue] as String it[RICO.normalizedDateValue.localName] as String
} }
val date = if (dates.isNotEmpty()) { val date = if (dates.isNotEmpty()) {
try { try {
val splitDate = dates[0].split("/") val splitDate = dates[0].split("/")
if (splitDate.size == 2) { if (splitDate.size == 2) {
IntegerRange(splitDate[0].toInt(), splitDate[1].toInt()) IntegerRange(splitDate[0].toInt(), splitDate[1].toInt())
} } else
else
IntegerRange(splitDate[0].toInt(), splitDate[0].toInt()) IntegerRange(splitDate[0].toInt(), splitDate[0].toInt())
} catch (ex: NumberFormatException) { } catch (ex: NumberFormatException) {
IntegerRange(3000, 3001) null
} }
} else { } else {
IntegerRange(3000, 3001) null
} }
val uri = recordSet[KEYS.entityId] as String val uri = recordSet[KEYS.entityId] as String
val id = uri.substringAfterLast("/") val id = uri.substringAfterLast("/")
val institution = recordSet[KEYS.heldBy] as String val institution = recordSet[RICO.heldBy.localName] as String
val institutionId = institution.substringAfterLast("/") val institutionId = institution.substringAfterLast("/")
val description = extractLanguageContainer(recordSet[RICO.descriptiveNote.localName], "")
val rights = extractLanguageContainer(recordSet[RICO.conditionsOfUse.localName], "")
val access = extractLanguageContainer(recordSet[RICO.conditionsOfAccess.localName], "")
val accessMemobase = extractLanguageContainer(recordSet[RDA.hasRestrictionOnAccess.localName], "")
val history = extractLanguageContainer(recordSet[RICO.history.localName], "")
val integrity = extractLanguageContainer(recordSet[RICO.integrity.localName], "")
val extent = extractLanguageContainer(recordSet[RICO.recordResourceExtent.localName], "")
val scopeAndContent = extractLanguageContainer(recordSet[RICO.scopeAndContent.localName], "")
val conformsTo = extractLanguageContainer(recordSet[DC.conformsTo.localName], "")
val dataImport = extractLanguageContainer(recordSet[RICO.descriptiveNote.localName], "")
return RecordSetSearchDoc( return RecordSetSearchDoc(
recordSetId = id, recordSetId = id,
...@@ -86,7 +124,6 @@ class RecordSetSearchDocBuilder(private val elasticSearchWrapper: ElasticSearchW ...@@ -86,7 +124,6 @@ class RecordSetSearchDocBuilder(private val elasticSearchWrapper: ElasticSearchW
} }
} }
}, },
scopeAndContent = description,
periodOfTimeAsYear = date, periodOfTimeAsYear = date,
institution = elasticSearchWrapper.getInstitutionName(institutionId), institution = elasticSearchWrapper.getInstitutionName(institutionId),
supportedByMemoriav = recordSet[KEYS.sponsoredBy] != null, supportedByMemoriav = recordSet[KEYS.sponsoredBy] != null,
...@@ -107,7 +144,22 @@ class RecordSetSearchDocBuilder(private val elasticSearchWrapper: ElasticSearchW ...@@ -107,7 +144,22 @@ class RecordSetSearchDocBuilder(private val elasticSearchWrapper: ElasticSearchW
}, },
emptyList() emptyList()
) )
} },
scopeAndContent = scopeAndContent,
accessMemobase = accessMemobase,
context = history,
originalTitle = originalTitles,
extent = extent,
selection = integrity,
indexing = conformsTo,
rights = rights,
description = description,
access = access,
project = projectTitles,
relatedRecordSets = relatedRecordSets,
relatedPublications = publicationTitles,
relatedDocuments = relatedDocumentTitles,
dataImport = dataImport
) )
} }
......
...@@ -119,12 +119,12 @@ object Extract { ...@@ -119,12 +119,12 @@ object Extract {
fun identifiers(value: Any?): List<String> { fun identifiers(value: Any?): List<String> {
return when (value) { return when (value) {
is String -> listOf(value) is String -> listOf(value)
is JsonObject -> value[KEYS.atType].let { if (it is String) listOf(it) else emptyList() } is JsonObject -> value[KEYS.entityId].let { if (it is String) listOf(it) else emptyList() }
is JsonArray<*> -> is JsonArray<*> ->
value.mapNotNull { item -> value.mapNotNull { item ->
when (item) { when (item) {
is String -> item is String -> item
is JsonObject -> value[KEYS.atType].let { id: Any? -> is JsonObject -> value[KEYS.entityId].let { id: Any? ->
if (id is String) if (id is String)
id id
else null else null
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
package org.memobase.helpers package org.memobase.helpers
import ch.memobase.rdf.NS import ch.memobase.rdf.NS
import ch.memobase.rdf.RICO
import com.beust.klaxon.JsonArray import com.beust.klaxon.JsonArray
import com.beust.klaxon.JsonObject import com.beust.klaxon.JsonObject
import com.beust.klaxon.Klaxon import com.beust.klaxon.Klaxon
...@@ -33,7 +34,6 @@ object JSON { ...@@ -33,7 +34,6 @@ object JSON {
private const val Record = "Record" private const val Record = "Record"
private const val RecordSet = "RecordSet" private const val RecordSet = "RecordSet"
private const val CorporateBody = "CorporateBody" private const val CorporateBody = "CorporateBody"
private const val memobaseInstitutionType = "memobaseInstitution"
const val institutionTag = "institution" const val institutionTag = "institution"
const val recordTag = "record" const val recordTag = "record"
const val recordSetTag = "recordSet" const val recordSetTag = "recordSet"
...@@ -52,11 +52,11 @@ object JSON { ...@@ -52,11 +52,11 @@ object JSON {
fun unpack(input: JsonObject): Map<String, JsonObject> { fun unpack(input: JsonObject): Map<String, JsonObject> {
val graph = input[graph] as JsonArray<JsonObject> val graph = input[graph] as JsonArray<JsonObject>
return graph.map { return graph.map {
if (it[KEYS.atType] == NS.rico + Record) { if (it[KEYS.atType] == RICO.Record.uri) {
Pair(recordTag, it) Pair(recordTag, it)
} else if (it[KEYS.atType] == NS.rico + RecordSet) { } else if (it[KEYS.atType] == RICO.RecordSet.uri) {
Pair(recordSetTag, it) Pair(recordSetTag, it)
} else if (it[KEYS.atType] == NS.rico + CorporateBody && it[KEYS.ricoType] == memobaseInstitutionType) { } else if (it[KEYS.atType] == RICO.CorporateBody.uri && it[KEYS.ricoType] == KEYS.CorporateBodyType.memobaseInstitution) {
Pair(institutionTag, it) Pair(institutionTag, it)
} else { } else {
Pair(it[KEYS.entityId] as String, it) Pair(it[KEYS.entityId] as String, it)
......
...@@ -35,9 +35,8 @@ object KEYS { ...@@ -35,9 +35,8 @@ object KEYS {
const val institutionIndex = "elastic.institutionIndex" const val institutionIndex = "elastic.institutionIndex"
} }
const val conditionsOfUse = "conditionsOfUse"
const val isAssociatedWithDate = "isAssociatedWithDate"
const val normalizedDateValue = "normalizedDateValue"
const val sameAs = "sameAs" const val sameAs = "sameAs"
const val entityId = "@id" const val entityId = "@id"
const val atType = "@type" const val atType = "@type"
...@@ -48,6 +47,8 @@ object KEYS { ...@@ -48,6 +47,8 @@ object KEYS {
const val teaserColorComputed = "teaserColorComputed" const val teaserColorComputed = "teaserColorComputed"
// Namespace rico: // Namespace rico:
const val scopeAndContent = "scopeAndContent"
const val recordResourceExtent = "recordResourceExtent"
const val ricoType = "type" const val ricoType = "type"
const val firstName = "firstName" const val firstName = "firstName"
const val lastName = "lastName" const val lastName = "lastName"
...@@ -65,6 +66,13 @@ object KEYS { ...@@ -65,6 +66,13 @@ object KEYS {
const val identifiedBy = "identifiedBy" const val identifiedBy = "identifiedBy"
const val hasSubject = "hasSubject" const val hasSubject = "hasSubject"
const val hasLocation = "hasLocation" const val hasLocation = "hasLocation"
const val integrity = "integrity"
const val history = "history"
const val conditionsOfAccess = "conditionsOfAccess"
const val conditionsOfUse = "conditionsOfUse"
const val isAssociatedWithDate = "isAssociatedWithDate"
const val normalizedDateValue = "normalizedDateValue"
// rico classes // rico classes
const val Person = "Person" const val Person = "Person"
...@@ -98,7 +106,7 @@ object KEYS { ...@@ -98,7 +106,7 @@ object KEYS {
const val issued = "issued" const val issued = "issued"
const val created = "created" const val created = "created"
const val temporal = "temporal" const val temporal = "temporal"
const val conformsTo = "conformsTo"
// namespace wdt: // namespace wdt:
const val wikidataInstance = "P31" const val wikidataInstance = "P31"
...@@ -121,6 +129,7 @@ object KEYS { ...@@ -121,6 +129,7 @@ object KEYS {
} }
object TitleTypes { object TitleTypes {
const val original = "original"
const val main = "main" const val main = "main"
const val series = "series" const val series = "series"
const val broadcast = "broadcast" const val broadcast = "broadcast"
...@@ -135,7 +144,10 @@ object KEYS { ...@@ -135,7 +144,10 @@ object KEYS {
object LocationType { object LocationType {
const val canton = "canton" const val canton = "canton"
const val municipality = "municipality" const val municipality = "municipality"
}
object CorporateBodyType {
const val memobaseInstitution = "memobaseInstitution" const val memobaseInstitution = "memobaseInstitution"
const val memobaseProject = "memobaseProject" const val memoriavProject = "memoriavProject"
} }
} }
...@@ -23,18 +23,13 @@ import org.memobase.helpers.KEYS ...@@ -23,18 +23,13 @@ import org.memobase.helpers.KEYS
@JsonInclude(JsonInclude.Include.NON_NULL) @JsonInclude(JsonInclude.Include.NON_NULL)
data class LanguageContainer( data class LanguageContainer(
val de: List<String>, val de: List<String> = emptyList(),
val fr: List<String>, val fr: List<String> = emptyList(),
val it: List<String>, val it: List<String> = emptyList(),
val un: List<String> // if the language is not known val un: List<String> = emptyList() // if the language is not known
) { ) {
companion object { companion object {
val EMPTY = LanguageContainer( val EMPTY = LanguageContainer()
emptyList(),
emptyList(),
emptyList(),
emptyList()
)
val DEFAULT = LanguageContainer( val DEFAULT = LanguageContainer(
listOf(KEYS.missingLabelDe), listOf(KEYS.missingLabelDe),
listOf(KEYS.missingLabelFr), listOf(KEYS.missingLabelFr),
...@@ -59,6 +54,57 @@ data class LanguageContainer( ...@@ -59,6 +54,57 @@ data class LanguageContainer(
else -> EMPTY else -> EMPTY
} }
} }
const val valueJsonLDFieldName = "@value"
const val languageJsonLDFieldName = "@language"
}
fun add(value: Any?): LanguageContainer {
return when (value) {
null -> this
is String ->
LanguageContainer(
de, fr, it, un + value
)
is Map<*, *> -> {
value as Map<String, Any?>
mapRdfLanguageTags(value)
}
is List<*> -> {
value.map { item ->
when (item) {
is String -> LanguageContainer(un = listOf(item))
is Map<*, *> -> {
item as Map<String, Any?>
mapRdfLanguageTags(item)
}
else -> EMPTY
}
}.reduce { acc, languageContainer -> acc.merge(languageContainer) }
}
else -> this
}
}
private fun mapRdfLanguageTags(value: Map<String, Any?>): LanguageContainer {
val item = value[valueJsonLDFieldName].let {
when (it) {
is String -> it
else -> null
}
}
val language = value[languageJsonLDFieldName].let {
when (it) {
is String -> it
else -> null
}
}
return LanguageContainer(
if (language == "de" && item != null) de + item else de,
if (language == "fr" && item != null) fr + item else fr,
if (language == "it" && item != null) it + item else it,
if (language == null && item != null) un + item else un
)
} }
fun toList(): List<String> { fun toList(): List<String> {
......
...@@ -25,28 +25,43 @@ import com.fasterxml.jackson.annotation.JsonInclude ...@@ -25,28 +25,43 @@ import com.fasterxml.jackson.annotation.JsonInclude
data class RecordSetSearchDoc( data class RecordSetSearchDoc(
@JsonIgnore @JsonIgnore
val recordSetId: String, val recordSetId: String,
val name: LanguageContainer,
val isPublished: Boolean, val isPublished: Boolean,
val name: LanguageContainer,
// Display
val keyVisualLink: String,
val numberOfDocuments: Int,
// Facets // Facets
val documentType: List<FacetContainer>, val documentType: List<FacetContainer>,
val periodOfTimeAsYear: IntegerRange?,
val supportedByMemoriav: Boolean, val supportedByMemoriav: Boolean,
val languageOfMetadata: List<FacetContainer>, val languageOfMetadata: List<FacetContainer>,
val institution: FacetContainer, val institution: FacetContainer,
val periodOfTimeAsYear: IntegerRange,
// Sort // Sort
val lastUpdatedDate: String, val lastUpdatedDate: String,
// Search // Search
val scopeAndContent: LanguageContainer, val scopeAndContent: LanguageContainer?,
val accessMemobase: LanguageContainer?,
// Display val context: LanguageContainer?,
val keyVisualLink: String, val originalTitle: LanguageContainer?,
val numberOfDocuments: Int val extent: LanguageContainer?,
val selection: LanguageContainer?,
val indexing: LanguageContainer?,
val rights: LanguageContainer?,
val description: LanguageContainer?,
val access: LanguageContainer?,
val project: LanguageContainer?,
val relatedRecordSets: LanguageContainer?,
val relatedPublications: LanguageContainer?,
val relatedDocuments: LanguageContainer?,
val dataImport: LanguageContainer?
) : Schema(recordSetId) { ) : Schema(recordSetId) {
companion object { companion object {
const val lorem = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
const val placeholderTitle = "Placeholder Title"
val DEFAULT = RecordSetSearchDoc( val DEFAULT = RecordSetSearchDoc(
recordSetId = "NoRecordSetId", recordSetId = "NoRecordSetId",
name = LanguageContainer.placeholder("TEST_RECORD_SET"), name = LanguageContainer.placeholder("TEST_RECORD_SET"),
...@@ -68,10 +83,26 @@ data class RecordSetSearchDoc( ...@@ -68,10 +83,26 @@ data class RecordSetSearchDoc(
facet = emptyList() facet = emptyList()
), ),
periodOfTimeAsYear = IntegerRange(2000, 2020), periodOfTimeAsYear = IntegerRange(2000, 2020),
scopeAndContent = LanguageContainer.placeholder("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."),
lastUpdatedDate = "2020-11-20T10:29:01.128", lastUpdatedDate = "2020-11-20T10:29:01.128",
keyVisualLink = "https://mb-wf1.memobase.unibas.ch/sites/default/files/styles/teaser/public/2020-10/1.jpg?itok=5ncVBnVQ", keyVisualLink = "https://mb-wf1.memobase.unibas.ch/sites/default/files/styles/teaser/public/2020-10/1.jpg?itok=5ncVBnVQ",
numberOfDocuments = 100 numberOfDocuments = 100,
scopeAndContent = LanguageContainer.placeholder(lorem),
accessMemobase = LanguageContainer.placeholder(lorem),
context = LanguageContainer.placeholder(lorem),
originalTitle = LanguageContainer.placeholder(placeholderTitle),
extent = LanguageContainer.placeholder(lorem),
selection = LanguageContainer.placeholder(lorem),
indexing = LanguageContainer.placeholder(lorem),
rights = LanguageContainer.placeholder(lorem),
description = LanguageContainer.placeholder(lorem),
access = LanguageContainer.placeholder(lorem),
project = LanguageContainer.placeholder(placeholderTitle),
relatedRecordSets = LanguageContainer.placeholder(placeholderTitle),
relatedPublications = LanguageContainer.placeholder(placeholderTitle),
relatedDocuments = LanguageContainer.placeholder(placeholderTitle),
dataImport = LanguageContainer.placeholder(lorem)
) )
} }
} }
\ No newline at end of file
package org.memobase package org.memobase
import ch.memobase.reporting.Report import ch.memobase.reporting.Report
import ch.memobase.reporting.ReportStatus
import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.kotlin.registerKotlinModule import com.fasterxml.jackson.module.kotlin.registerKotlinModule
import io.mockk.every import io.mockk.every
...@@ -43,33 +44,33 @@ class TestRecordSetSearchDoc { ...@@ -43,33 +44,33 @@ class TestRecordSetSearchDoc {
@Test @Test
fun `transform record set search doc`() { fun `transform record set search doc`() {
val settings = App.createSettings("kafkaTest1.yml") val settings = App.createSettings("kafkaTest1.yml")
val data = readFile("input1.json") val data = readFile("completeExample.json")
val wrapper = mockk<ElasticSearchWrapper>() val wrapper = mockk<ElasticSearchWrapper>()
every { wrapper.countNumberOfDocuments("sap-021") } returns 102 every { wrapper.countNumberOfDocuments("testComplete") } returns 102
every { wrapper.getDocumentTypesFromRecords("sap-021", "recordSet.facet") } returns listOf(FacetContainer(LanguageContainer(listOf("Fotographie"), listOf("Photographie"), listOf("Fotografia"), emptyList()), null, emptyList())) every { wrapper.getDocumentTypesFromRecords("testCom