Commit c020242d authored by Jonas Waeber's avatar Jonas Waeber
Browse files

Refactor SearchDoc to DocumentsSearchDoc.

Improve logging for missing record sets & institutions in records.
parent 517da906
Pipeline #17584 passed with stages
in 9 minutes and 54 seconds
...@@ -24,10 +24,10 @@ import org.memobase.builders.* ...@@ -24,10 +24,10 @@ import org.memobase.builders.*
import org.memobase.helpers.* import org.memobase.helpers.*
import org.memobase.model.EnrichedDigitalMetadata import org.memobase.model.EnrichedDigitalMetadata
import org.memobase.model.Schema import org.memobase.model.Schema
import org.memobase.model.SearchDoc import org.memobase.model.DocumentsSearchDoc
import org.memobase.rdf.NS import org.memobase.rdf.NS
class SearchDocTransform(private val mediaUrl: String) { class DocumentsSearchDocBuilder(private val mediaUrl: String) {
private val log = LogManager.getLogger("SearchDocTransform") private val log = LogManager.getLogger("SearchDocTransform")
fun transform(input: Map<String, JsonObject>): Schema { fun transform(input: Map<String, JsonObject>): Schema {
...@@ -139,7 +139,6 @@ class SearchDocTransform(private val mediaUrl: String) { ...@@ -139,7 +139,6 @@ class SearchDocTransform(private val mediaUrl: String) {
log.error("Found no carrier types for record $id.") log.error("Found no carrier types for record $id.")
} }
val locator = try { val locator = try {
val value = Extract.extractIdValue(digitalIdentifierEntities, KEYS.IdentifierType.main) val value = Extract.extractIdValue(digitalIdentifierEntities, KEYS.IdentifierType.main)
if (value == null) if (value == null)
...@@ -172,11 +171,11 @@ class SearchDocTransform(private val mediaUrl: String) { ...@@ -172,11 +171,11 @@ class SearchDocTransform(private val mediaUrl: String) {
} }
} }
return SearchDoc( return DocumentsSearchDoc(
title = Extract.typedEntityByType(recordTitles, "type", "main", "title"), title = Extract.typedEntityByType(recordTitles, "type", "main", "title"),
seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"), seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"),
broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"), broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"),
type = record.getOrDefault("type", "Foto") as String, type = record.getOrDefault("type", "NoDocumentTypeDefined") as String,
sourceID = try { sourceID = try {
Extract.extractIdValue(recordIdentifiers, KEYS.IdentifierType.original) ?: "NoSourceIdFound" Extract.extractIdValue(recordIdentifiers, KEYS.IdentifierType.original) ?: "NoSourceIdFound"
} catch (ex: NoSuchElementException) { } catch (ex: NoSuchElementException) {
......
...@@ -29,6 +29,7 @@ import org.memobase.helpers.Default ...@@ -29,6 +29,7 @@ import org.memobase.helpers.Default
import org.memobase.helpers.JSON import org.memobase.helpers.JSON
import org.memobase.helpers.KEYS import org.memobase.helpers.KEYS
import org.memobase.helpers.ReportStatus import org.memobase.helpers.ReportStatus
import org.memobase.model.DocumentsSearchDoc
import org.memobase.model.Report import org.memobase.model.Report
import org.memobase.model.Schema import org.memobase.model.Schema
import org.memobase.settings.SettingsLoader import org.memobase.settings.SettingsLoader
...@@ -39,7 +40,7 @@ class KafkaTopology(private val settings: SettingsLoader) { ...@@ -39,7 +40,7 @@ class KafkaTopology(private val settings: SettingsLoader) {
private val reportTopic = settings.processReportTopic private val reportTopic = settings.processReportTopic
private val searchDocTransform = SearchDocTransform(settings.appSettings.getProperty(KEYS.SettingsProps.mediaUrl)) private val searchDocTransform = DocumentsSearchDocBuilder(settings.appSettings.getProperty(KEYS.SettingsProps.mediaUrl))
private val institutionSearchDoc = private val institutionSearchDoc =
InstitutionSearchDocBuilder(settings.appSettings.getProperty(KEYS.SettingsProps.institutionTypeLabelsPath)) InstitutionSearchDocBuilder(settings.appSettings.getProperty(KEYS.SettingsProps.institutionTypeLabelsPath))
...@@ -67,7 +68,7 @@ class KafkaTopology(private val settings: SettingsLoader) { ...@@ -67,7 +68,7 @@ class KafkaTopology(private val settings: SettingsLoader) {
Report(readOnlyKey, "SUCCESS", "Transformed message into search doc.") Report(readOnlyKey, "SUCCESS", "Transformed message into search doc.")
) )
} catch (ex: InvalidInputException) { } catch (ex: InvalidInputException) {
Pair(Default.searchDoc, Report(readOnlyKey, "FAILURE", ex.localizedMessage)) Pair(DocumentsSearchDoc.DEFAULT, Report(readOnlyKey, "FAILURE", ex.localizedMessage))
} }
} }
......
...@@ -22,71 +22,4 @@ object Default { ...@@ -22,71 +22,4 @@ object Default {
"", "",
"" ""
) )
val searchDoc = SearchDoc(
emptyList(),
emptyList(),
emptyList(),
"",
"",
"",
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
FacetContainer(LanguageContainer(emptyList(), emptyList(), emptyList(), emptyList()),"", emptyList()),
false,
emptyList(),
emptyList(),
"",
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
EnrichedDigitalMetadata(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
false,
SuggestContainer(emptyList(), emptyList(), emptyList(), emptyList())
)
} }
\ No newline at end of file
...@@ -10,7 +10,7 @@ import org.memobase.rdf.NS ...@@ -10,7 +10,7 @@ import org.memobase.rdf.NS
Extraction helpers for institutions and record sets. Extraction helpers for institutions and record sets.
*/ */
object Meta { object Meta {
private val log = LogManager.getLogger("InstitutionExtraction") private val log = LogManager.getLogger("InstitutionAndRecordSetExtractionHelper")
fun extractInstitution(record: JsonObject): List<FacetContainer> { fun extractInstitution(record: JsonObject): List<FacetContainer> {
return extract(KEYS.heldBy, record) return extract(KEYS.heldBy, record)
...@@ -21,20 +21,10 @@ object Meta { ...@@ -21,20 +21,10 @@ object Meta {
return if (containers.isNotEmpty()) { return if (containers.isNotEmpty()) {
containers[0] containers[0]
} else { } else {
FacetContainer( FacetContainer.placeholder("NoRecordSetInRecord")
LanguageContainer(
listOf("Unknown RecordSet"),
listOf("Unknown RecordSet"),
listOf("Unknown RecordSet"),
listOf("Unknown RecordSet")
),
"UnknownRecordSet",
listOf("UnknownRecordSet")
)
} }
} }
// TODO: Implement function to extract labels from elastic index.
private fun extract(key: String, record: JsonObject): List<FacetContainer> { private fun extract(key: String, record: JsonObject): List<FacetContainer> {
return record[key].let { items -> return record[key].let { items ->
when (items) { when (items) {
...@@ -53,7 +43,7 @@ object Meta { ...@@ -53,7 +43,7 @@ object Meta {
) )
} }
else -> { else -> {
log.error("Could not extract institution in property heldBy from record ${record[KEYS.entityId]}") log.error("No property $key in record ${record[KEYS.entityId]}.")
emptyList() emptyList()
} }
} }
......
...@@ -22,7 +22,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore ...@@ -22,7 +22,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore
import com.fasterxml.jackson.annotation.JsonInclude import com.fasterxml.jackson.annotation.JsonInclude
@JsonInclude(JsonInclude.Include.NON_EMPTY) @JsonInclude(JsonInclude.Include.NON_EMPTY)
data class SearchDoc( data class DocumentsSearchDoc(
// Titles // Titles
val title: List<LanguageContainer>, val title: List<LanguageContainer>,
val seriesTitle: List<LanguageContainer>, val seriesTitle: List<LanguageContainer>,
...@@ -115,4 +115,74 @@ data class SearchDoc( ...@@ -115,4 +115,74 @@ data class SearchDoc(
// auto complete source // auto complete source
val suggest: SuggestContainer val suggest: SuggestContainer
) : Schema(recordId) ) : Schema(recordId) {
companion object {
val DEFAULT = DocumentsSearchDoc(
emptyList(),
emptyList(),
emptyList(),
"NoDocumentTypeDefined",
"NoSourceIdFound",
"https://memobase.ch/record/TestIdentifier",
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
FacetContainer.placeholder("NoRecordSetInRecord"),
false,
emptyList(),
emptyList(),
"",
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
EnrichedDigitalMetadata(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
false,
SuggestContainer(emptyList(), emptyList(), emptyList(), emptyList())
)
}
}
...@@ -22,7 +22,7 @@ import com.fasterxml.jackson.annotation.JsonInclude ...@@ -22,7 +22,7 @@ import com.fasterxml.jackson.annotation.JsonInclude
import org.memobase.helpers.KEYS import org.memobase.helpers.KEYS
@JsonInclude(JsonInclude.Include.NON_NULL) @JsonInclude(JsonInclude.Include.NON_NULL)
open class LanguageContainer( data class LanguageContainer(
val de: List<String>, val de: List<String>,
val fr: List<String>, val fr: List<String>,
val it: List<String>, val it: List<String>,
......
package org.memobase package org.memobase
import com.beust.klaxon.Klaxon import com.beust.klaxon.Klaxon
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Test import org.junit.jupiter.api.Test
import org.junit.jupiter.api.TestInstance import org.junit.jupiter.api.TestInstance
import org.memobase.helpers.JSON import org.memobase.helpers.JSON
import org.memobase.model.DocumentsSearchDoc
import java.io.File import java.io.File
import java.nio.charset.Charset import java.nio.charset.Charset
...@@ -16,7 +18,7 @@ class TestDocumentsSearchDoc { ...@@ -16,7 +18,7 @@ class TestDocumentsSearchDoc {
} }
private val klaxon = Klaxon() private val klaxon = Klaxon()
private val transformer = SearchDocTransform("https://media.memobase.k8s.unibas.ch/memo/") private val transformer = DocumentsSearchDocBuilder("https://media.memobase.k8s.unibas.ch/memo/")
@Test @Test
fun `test minimal record required`() { fun `test minimal record required`() {
...@@ -25,6 +27,9 @@ class TestDocumentsSearchDoc { ...@@ -25,6 +27,9 @@ class TestDocumentsSearchDoc {
val output = transformer.transform( val output = transformer.transform(
mappedInput mappedInput
) )
println(output) assertThat(output)
.isEqualTo(
DocumentsSearchDoc.DEFAULT
)
} }
} }
\ No newline at end of file
...@@ -15,7 +15,7 @@ class TestTransform { ...@@ -15,7 +15,7 @@ class TestTransform {
@Test @Test
fun `test missing record`() { fun `test missing record`() {
val searchDoc = SearchDocTransform("https://media.memobase.k8s.unibas.ch/memo/") val searchDoc = DocumentsSearchDocBuilder("https://media.memobase.k8s.unibas.ch/memo/")
val input = mapOf(Pair("", json { obj() })) val input = mapOf(Pair("", json { obj() }))
assertThrows<InvalidInputException> { searchDoc.transform(input) } assertThrows<InvalidInputException> { searchDoc.transform(input) }
} }
...@@ -23,7 +23,7 @@ class TestTransform { ...@@ -23,7 +23,7 @@ class TestTransform {
@Test @Test
fun `test enriched digital metadata record`() { fun `test enriched digital metadata record`() {
val searchDoc = SearchDocTransform("https://media.memobase.k8s.unibas.ch/memo/") val searchDoc = DocumentsSearchDocBuilder("https://media.memobase.k8s.unibas.ch/memo/")
val input = JSON.unpack( val input = JSON.unpack(
JSON.parse( JSON.parse(
FileInputStream(File("$dataPath/enrich_digital_metadata_record.json")).bufferedReader().readLines() FileInputStream(File("$dataPath/enrich_digital_metadata_record.json")).bufferedReader().readLines()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment