Commit c020242d authored by Jonas Waeber's avatar Jonas Waeber
Browse files

Refactor SearchDoc to DocumentsSearchDoc.

Improve logging for missing record sets & institutions in records.
parent 517da906
Pipeline #17584 passed with stages
in 9 minutes and 54 seconds
......@@ -24,10 +24,10 @@ import org.memobase.builders.*
import org.memobase.helpers.*
import org.memobase.model.EnrichedDigitalMetadata
import org.memobase.model.Schema
import org.memobase.model.SearchDoc
import org.memobase.model.DocumentsSearchDoc
import org.memobase.rdf.NS
class SearchDocTransform(private val mediaUrl: String) {
class DocumentsSearchDocBuilder(private val mediaUrl: String) {
private val log = LogManager.getLogger("SearchDocTransform")
fun transform(input: Map<String, JsonObject>): Schema {
......@@ -139,7 +139,6 @@ class SearchDocTransform(private val mediaUrl: String) {
log.error("Found no carrier types for record $id.")
}
val locator = try {
val value = Extract.extractIdValue(digitalIdentifierEntities, KEYS.IdentifierType.main)
if (value == null)
......@@ -172,11 +171,11 @@ class SearchDocTransform(private val mediaUrl: String) {
}
}
return SearchDoc(
return DocumentsSearchDoc(
title = Extract.typedEntityByType(recordTitles, "type", "main", "title"),
seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"),
broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"),
type = record.getOrDefault("type", "Foto") as String,
type = record.getOrDefault("type", "NoDocumentTypeDefined") as String,
sourceID = try {
Extract.extractIdValue(recordIdentifiers, KEYS.IdentifierType.original) ?: "NoSourceIdFound"
} catch (ex: NoSuchElementException) {
......
......@@ -29,6 +29,7 @@ import org.memobase.helpers.Default
import org.memobase.helpers.JSON
import org.memobase.helpers.KEYS
import org.memobase.helpers.ReportStatus
import org.memobase.model.DocumentsSearchDoc
import org.memobase.model.Report
import org.memobase.model.Schema
import org.memobase.settings.SettingsLoader
......@@ -39,7 +40,7 @@ class KafkaTopology(private val settings: SettingsLoader) {
private val reportTopic = settings.processReportTopic
private val searchDocTransform = SearchDocTransform(settings.appSettings.getProperty(KEYS.SettingsProps.mediaUrl))
private val searchDocTransform = DocumentsSearchDocBuilder(settings.appSettings.getProperty(KEYS.SettingsProps.mediaUrl))
private val institutionSearchDoc =
InstitutionSearchDocBuilder(settings.appSettings.getProperty(KEYS.SettingsProps.institutionTypeLabelsPath))
......@@ -67,7 +68,7 @@ class KafkaTopology(private val settings: SettingsLoader) {
Report(readOnlyKey, "SUCCESS", "Transformed message into search doc.")
)
} catch (ex: InvalidInputException) {
Pair(Default.searchDoc, Report(readOnlyKey, "FAILURE", ex.localizedMessage))
Pair(DocumentsSearchDoc.DEFAULT, Report(readOnlyKey, "FAILURE", ex.localizedMessage))
}
}
......
......@@ -22,71 +22,4 @@ object Default {
"",
""
)
val searchDoc = SearchDoc(
emptyList(),
emptyList(),
emptyList(),
"",
"",
"",
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
FacetContainer(LanguageContainer(emptyList(), emptyList(), emptyList(), emptyList()),"", emptyList()),
false,
emptyList(),
emptyList(),
"",
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
EnrichedDigitalMetadata(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
false,
SuggestContainer(emptyList(), emptyList(), emptyList(), emptyList())
)
}
\ No newline at end of file
......@@ -10,7 +10,7 @@ import org.memobase.rdf.NS
Extraction helpers for institutions and record sets.
*/
object Meta {
private val log = LogManager.getLogger("InstitutionExtraction")
private val log = LogManager.getLogger("InstitutionAndRecordSetExtractionHelper")
fun extractInstitution(record: JsonObject): List<FacetContainer> {
return extract(KEYS.heldBy, record)
......@@ -21,20 +21,10 @@ object Meta {
return if (containers.isNotEmpty()) {
containers[0]
} else {
FacetContainer(
LanguageContainer(
listOf("Unknown RecordSet"),
listOf("Unknown RecordSet"),
listOf("Unknown RecordSet"),
listOf("Unknown RecordSet")
),
"UnknownRecordSet",
listOf("UnknownRecordSet")
)
FacetContainer.placeholder("NoRecordSetInRecord")
}
}
// TODO: Implement function to extract labels from elastic index.
private fun extract(key: String, record: JsonObject): List<FacetContainer> {
return record[key].let { items ->
when (items) {
......@@ -53,7 +43,7 @@ object Meta {
)
}
else -> {
log.error("Could not extract institution in property heldBy from record ${record[KEYS.entityId]}")
log.error("No property $key in record ${record[KEYS.entityId]}.")
emptyList()
}
}
......
......@@ -22,7 +22,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore
import com.fasterxml.jackson.annotation.JsonInclude
@JsonInclude(JsonInclude.Include.NON_EMPTY)
data class SearchDoc(
data class DocumentsSearchDoc(
// Titles
val title: List<LanguageContainer>,
val seriesTitle: List<LanguageContainer>,
......@@ -115,4 +115,74 @@ data class SearchDoc(
// auto complete source
val suggest: SuggestContainer
) : Schema(recordId)
) : Schema(recordId) {
companion object {
val DEFAULT = DocumentsSearchDoc(
emptyList(),
emptyList(),
emptyList(),
"NoDocumentTypeDefined",
"NoSourceIdFound",
"https://memobase.ch/record/TestIdentifier",
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
FacetContainer.placeholder("NoRecordSetInRecord"),
false,
emptyList(),
emptyList(),
"",
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
EnrichedDigitalMetadata(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
emptyList(),
false,
SuggestContainer(emptyList(), emptyList(), emptyList(), emptyList())
)
}
}
......@@ -22,7 +22,7 @@ import com.fasterxml.jackson.annotation.JsonInclude
import org.memobase.helpers.KEYS
@JsonInclude(JsonInclude.Include.NON_NULL)
open class LanguageContainer(
data class LanguageContainer(
val de: List<String>,
val fr: List<String>,
val it: List<String>,
......
package org.memobase
import com.beust.klaxon.Klaxon
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.TestInstance
import org.memobase.helpers.JSON
import org.memobase.model.DocumentsSearchDoc
import java.io.File
import java.nio.charset.Charset
......@@ -16,7 +18,7 @@ class TestDocumentsSearchDoc {
}
private val klaxon = Klaxon()
private val transformer = SearchDocTransform("https://media.memobase.k8s.unibas.ch/memo/")
private val transformer = DocumentsSearchDocBuilder("https://media.memobase.k8s.unibas.ch/memo/")
@Test
fun `test minimal record required`() {
......@@ -25,6 +27,9 @@ class TestDocumentsSearchDoc {
val output = transformer.transform(
mappedInput
)
println(output)
assertThat(output)
.isEqualTo(
DocumentsSearchDoc.DEFAULT
)
}
}
\ No newline at end of file
......@@ -15,7 +15,7 @@ class TestTransform {
@Test
fun `test missing record`() {
val searchDoc = SearchDocTransform("https://media.memobase.k8s.unibas.ch/memo/")
val searchDoc = DocumentsSearchDocBuilder("https://media.memobase.k8s.unibas.ch/memo/")
val input = mapOf(Pair("", json { obj() }))
assertThrows<InvalidInputException> { searchDoc.transform(input) }
}
......@@ -23,7 +23,7 @@ class TestTransform {
@Test
fun `test enriched digital metadata record`() {
val searchDoc = SearchDocTransform("https://media.memobase.k8s.unibas.ch/memo/")
val searchDoc = DocumentsSearchDocBuilder("https://media.memobase.k8s.unibas.ch/memo/")
val input = JSON.unpack(
JSON.parse(
FileInputStream(File("$dataPath/enrich_digital_metadata_record.json")).bufferedReader().readLines()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment