Commit 58eca486 authored by Jonas Waeber's avatar Jonas Waeber
Browse files

Merge branch 'update-date-sort-and-facet'

# Conflicts:
#	src/main/kotlin/ch/memobase/helpers/DateFacetBuildHelpers.kt
parents 570c0fc9 3ec4a536
deploymentName: di-es-transformer-prod
k8sName: es-transformer
k8sGroupId: di
k8sGroupName: documents-import
outputTopic: mb-di-frontend-es-documents-prod
inputTopic: mb-di-processed-records-prod
reportingTopic: mb-di-reporting-prod
......@@ -10,6 +13,8 @@ documentTypeLabels: prod-document-type-labels
accessTermLabels: prod-access-term-labels
reuseStatementLabels: prod-reuse-statement-labels
applicationIdVersion: v1 # change this variable when the data from the kafka topic should be fully re-imported.
documentsIndexSource: prod-documents-index
institutionIndexSource: prod-institutions-index
recordSetIndexSource: prod-record-sets-index
......
deploymentName: di-es-transformer-stage
k8sName: es-transformer
k8sGroupId: di
k8sGroupName: documents-import
outputTopic: mb-di-frontend-es-documents-stage
inputTopic: mb-di-processed-records-stage
reportingTopic: mb-di-reporting-stage
......@@ -10,6 +13,8 @@ documentTypeLabels: stage-document-type-labels
accessTermLabels: stage-access-term-labels
reuseStatementLabels: stage-reuse-statement-labels
applicationIdVersion: v1 # change this variable when the data from the kafka topic should be fully re-imported.
documentsIndexSource: stage-documents-index
institutionIndexSource: stage-institutions-index
recordSetIndexSource: stage-record-sets-index
......
deploymentName: di-es-transformer-test
k8sName: es-transformer
k8sGroupId: di
k8sGroupName: documents-import
outputTopic: mb-di-frontend-es-documents-prod
inputTopic: mb-di-processed-records-prod
reportingTopic: mb-di-reporting-prod
......@@ -10,6 +13,8 @@ documentTypeLabels: test-document-type-labels
accessTermLabels: test-access-term-labels
reuseStatementLabels: test-reuse-statement-labels
applicationIdVersion: v4 # change this variable when the data from the kafka topic should be fully re-imported.
documentsIndexSource: test-documents-index
institutionIndexSource: test-institutions-index
recordSetIndexSource: test-record-sets-index
......
deploymentName: gi-es-institutions-transformer-prod
k8sName: es-institutions-transformer
k8sGroupId: gi
k8sGroupName: group-import
outputTopic: mb-gi-frontend-es-institutions-prod
inputTopic: mb-gi-processed-institutions-prod
reportingTopic: mb-di-reporting-prod
......@@ -11,6 +14,8 @@ documentTypeLabels: prod-document-type-labels
accessTermLabels: prod-access-term-labels
reuseStatementLabels: prod-reuse-statement-labels
applicationIdVersion: v1 # change this variable when the data from the kafka topic should be fully re-imported.
documentsIndexSource: prod-documents-index
institutionIndexSource: prod-institutions-index
recordSetIndexSource: prod-record-sets-index
......
deploymentName: gi-es-institutions-transformer-stage
k8sName: es-institutions-transformer
k8sGroupId: gi
k8sGroupName: group-import
outputTopic: mb-gi-frontend-es-institutions-stage
inputTopic: mb-gi-processed-institutions-stage
reportingTopic: mb-di-reporting-stage
......@@ -11,6 +14,8 @@ documentTypeLabels: stage-document-type-labels
accessTermLabels: stage-access-term-labels
reuseStatementLabels: stage-reuse-statement-labels
applicationIdVersion: v1 # change this variable when the data from the kafka topic should be fully re-imported.
documentsIndexSource: stage-documents-index
institutionIndexSource: stage-institutions-index
recordSetIndexSource: stage-record-sets-index
......
deploymentName: gi-es-institutions-transformer-test
k8sName: es-institutions-transformer
k8sGroupId: gi
k8sGroupName: group-import
outputTopic: mb-gi-frontend-es-institutions-prod
inputTopic: mb-gi-processed-institutions-prod
reportingTopic: mb-di-reporting-prod
......@@ -11,6 +14,8 @@ documentTypeLabels: test-document-type-labels
accessTermLabels: test-access-term-labels
reuseStatementLabels: test-reuse-statement-labels
applicationIdVersion: v1 # change this variable when the data from the kafka topic should be fully re-imported.
documentsIndexSource: test-documents-index
institutionIndexSource: test-institutions-index
recordSetIndexSource: test-record-sets-index
......
deploymentName: gi-es-record-sets-transformer-prod
k8sName: es-record-sets-transformer
k8sGroupId: gi
k8sGroupName: group-import
outputTopic: mb-gi-frontend-es-record-sets-prod
inputTopic: mb-gi-processed-record-sets-prod
reportingTopic: mb-di-reporting-prod
......@@ -11,6 +14,8 @@ documentTypeLabels: prod-document-type-labels
accessTermLabels: prod-access-term-labels
reuseStatementLabels: prod-reuse-statement-labels
applicationIdVersion: v1 # change this variable when the data from the kafka topic should be fully re-imported.
documentsIndexSource: prod-documents-index
institutionIndexSource: prod-institutions-index
recordSetIndexSource: prod-record-sets-index
......
deploymentName: gi-es-record-sets-transformer-stage
k8sName: es-record-sets-transformer
k8sGroupId: gi
k8sGroupName: group-import
outputTopic: mb-gi-frontend-es-record-sets-stage
inputTopic: mb-gi-processed-record-sets-stage
reportingTopic: mb-di-reporting-stage
......@@ -11,6 +14,8 @@ documentTypeLabels: stage-document-type-labels
accessTermLabels: stage-access-term-labels
reuseStatementLabels: stage-reuse-statement-labels
applicationIdVersion: v1 # change this variable when the data from the kafka topic should be fully re-imported.
documentsIndexSource: stage-documents-index
institutionIndexSource: stage-institutions-index
recordSetIndexSource: stage-record-sets-index
......
deploymentName: gi-es-record-sets-transformer-test
k8sName: es-record-sets-transformer
k8sGroupId: gi
k8sGroupName: group-import
outputTopic: mb-gi-frontend-es-record-sets-prod
inputTopic: mb-gi-processed-record-sets-prod
reportingTopic: mb-di-reporting-prod
......@@ -11,6 +14,8 @@ documentTypeLabels: test-document-type-labels
accessTermLabels: test-access-term-labels
reuseStatementLabels: test-reuse-statement-labels
applicationIdVersion: v1 # change this variable when the data from the kafka topic should be fully re-imported.
documentsIndexSource: test-documents-index
institutionIndexSource: test-institutions-index
recordSetIndexSource: test-record-sets-index
......
apiVersion: v1
kind: ConfigMap
metadata:
name: "{{ .Values.deploymentName }}-app-config"
name: "{{ .Values.k8sGroupId }}-{{ .Values.k8sName }}-{{ .Values.k8sEnvironment }}-app-config"
namespace: memobase
labels:
app: "{{ .Values.k8sName }}"
environment: {{ .Values.k8sEnvironment }}
group: "{{ .Values.k8sGroupName }}"
data:
APPLICATION_ID: "{{ .Values.deploymentName }}-app"
APPLICATION_ID: "{{ .Values.k8sGroupId }}-{{ .Values.k8sName }}-{{ .Values.k8sEnvironment }}-{{.Values.applicationIdVersion }}-app"
MEDIA_SERVER_URL: "{{ .Values.mediaServerUrl }}"
UPDATE_TOPIC: "{{ .Values.updateTopic }}"
TOPIC_IN: "{{ .Values.inputTopic }}"
......
apiVersion: apps/v1
kind: Deployment
metadata:
name: "{{ .Values.deploymentName }}-deployment"
name: "{{ .Values.k8sGroupId }}-{{ .Values.k8sName }}-{{ .Values.k8sEnvironment }}-deployment"
namespace: memobase
labels:
app: "{{ .Values.deploymentName }}"
app: "{{ .Values.k8sName }}"
environment: {{ .Values.k8sEnvironment }}
group: "documents-import"
group: "{{ .Values.k8sGroupName }}"
spec:
selector:
matchLabels:
app: "{{ .Values.deploymentName }}"
app: "{{ .Values.k8sName }}"
replicas: {{ .Values.k8sReplicas }}
template:
metadata:
labels:
app: "{{ .Values.deploymentName }}"
app: "{{ .Values.k8sName }}"
environment: {{ .Values.k8sEnvironment }}
group: "documents-import"
group: "{{ .Values.k8sGroupName }}"
spec:
containers:
- name: "{{ .Values.deploymentName }}-container"
- name: "{{ .Values.k8sGroupId }}-{{ .Values.k8sName }}-{{ .Values.k8sEnvironment }}-container"
image: "{{.Values.registry}}/{{ .Values.image }}:{{ .Values.tag }}"
imagePullPolicy: Always
resources:
......@@ -52,7 +52,7 @@ spec:
- configMapRef:
name: "{{ .Values.elasticConfigs }}"
- configMapRef:
name: "{{ .Values.deploymentName}}-app-config"
name: "{{ .Values.k8sGroupId }}-{{ .Values.k8sName }}-{{ .Values.k8sEnvironment }}-app-config"
volumeMounts:
- name: instituion-type-labels
mountPath: "/configs/institution_types/"
......
......@@ -3,6 +3,10 @@ registry: "cr.gitlab.switch.ch"
image: "memoriav/memobase-2020/services/elastic-services/search-doc-service"
tag: "latest"
k8sName: placeholder
k8sGroupId: placeholder
k8sGroupName: placeholder
k8sEnvironment: placeholder
k8sReplicas: 1
k8sRequestsCpu: "0.1"
......@@ -10,7 +14,6 @@ k8sRequestsMemory: "128Mi"
k8sLimitsCpu: "0.5"
k8sLimitsMemory: "512Mi"
deploymentName: placeholder
kafkaConfigs: placeholder
elasticConfigs: placeholder
......@@ -20,6 +23,7 @@ recordSetIndexSource: placeholder
outputTopic: placeholder
inputTopic: placeholder
reportingTopic: placeholder
applicationIdVersion: placeholder # change this variable when the data from the kafka topic should be fully re-imported.
instutionTypeLabels: placeholder
documentTypeLabels: placeholder
......
......@@ -18,7 +18,7 @@
package ch.memobase
import ch.memobase.builders.AgentContainerBuilder
import ch.memobase.builders.DateContainerBuilder
import ch.memobase.builders.DateSearchFieldBuilder
import ch.memobase.builders.EnrichedFacetContainerBuilder
import ch.memobase.builders.FacettedContainerBuilder
import ch.memobase.builders.IFieldBuilder
......@@ -106,9 +106,9 @@ class DocumentsSearchDocBuilder(
val placeFacetBuilder = PlaceFacetBuilder()
val dateCreatedBuilder = DateContainerBuilder(dateCreatedIds)
val dateIssuedBuilder = DateContainerBuilder(dateIssuedIds)
val temporalBuilder = DateContainerBuilder(temporalIds)
val dateCreatedBuilder = DateSearchFieldBuilder(dateCreatedIds, "created")
val dateIssuedBuilder = DateSearchFieldBuilder(dateIssuedIds, "issued")
val temporalBuilder = DateSearchFieldBuilder(temporalIds, "temporal")
val suggestContainerBuilder = SuggestContainerBuilder(keywordIds)
......@@ -265,9 +265,10 @@ class DocumentsSearchDocBuilder(
),
relatedMaterial = Extract.languageContainer("relation (record id: $key)", record["relation"]),
source = Extract.languageContainer("source (record id: $key)", record["source"]),
temporal = temporalBuilder.build(),
dateCreated = dateCreatedBuilder.build(),
dateIssued = dateIssuedBuilder.build(),
temporal = temporalBuilder.build().first,
dateCreated = dateCreatedBuilder.build().first,
dateIssued = dateIssuedBuilder.build().first,
dateFacetField = dateCreatedBuilder.build().second + dateIssuedBuilder.build().second,
placeCapture = placeCapturedBuilder.build(),
placeRelated = placesRelatedBuilder.build(),
placeFacet = placeFacetBuilder.build(),
......
......@@ -17,18 +17,21 @@
*/
package ch.memobase.builders
import ch.memobase.helpers.Constants
import ch.memobase.helpers.DateFacetBuildHelpers
import ch.memobase.model.DateFacetField
import ch.memobase.model.DateSearchField
import ch.memobase.model.LanguageContainer
import ch.memobase.rdf.NS
import com.beust.klaxon.JsonArray
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
import ch.memobase.helpers.DateFacetBuildHelpers
import ch.memobase.helpers.Constants
import ch.memobase.model.DateContainer
class DateContainerBuilder(private val containedIds: List<String>) : IFieldBuilder {
class DateSearchFieldBuilder(private val containedIds: List<String>, private val type: String) : IFieldBuilder {
private val log = LogManager.getLogger("DateContainerBuilder")
private val dateContainers = mutableListOf<DateContainer>()
private val dateSearchFields = mutableListOf<DateSearchField>()
private val dateFacetFields = mutableListOf<DateFacetField>()
override fun filter(jsonObject: JsonObject): Boolean {
return if (containedIds.contains(jsonObject[Constants.entityId]))
......@@ -59,15 +62,55 @@ class DateContainerBuilder(private val containedIds: List<String>) : IFieldBuild
is JsonArray<*> -> value.mapNotNull { it as String? }
else -> emptyList()
}
val facetList = when (jsonObject["@type"] as String) {
val facetLists = listOf("de", "fr", "it").map {
collectFacetStrings(
jsonObject,
isNormalized, date, key, it
)
}
dateSearchFields.add(
DateSearchField(
date = date,
qualifier = qualifier,
certainty = certainty
)
)
dateFacetFields.add(
DateFacetField(
display = date,
type = type,
sort = sort,
facet = LanguageContainer(
de = facetLists[0],
fr = facetLists[1],
it = facetLists[2]
)
)
)
return "Transformed date to date container."
}
override fun build(): Pair<List<DateSearchField>, List<DateFacetField>> {
return Pair(dateSearchFields, dateFacetFields)
}
private fun collectFacetStrings(
jsonObject: JsonObject,
isNormalized: Boolean,
date: String,
key: String,
language: String
): List<String> {
return when (jsonObject["@type"] as String) {
NS.rico + "SingleDate" ->
if (isNormalized)
DateFacetBuildHelpers.buildFromNormalizedSingleDate(date)
DateFacetBuildHelpers.buildFromNormalizedSingleDate(date, language)
else emptyList()
NS.rico + "DateRange" ->
if (isNormalized) {
try {
DateFacetBuildHelpers.buildFromNormalizedDateRange(date)
DateFacetBuildHelpers.buildFromNormalizedDateRange(date, language)
} catch (ex: NumberFormatException) {
log.error("Could not parse normalized date $date in resource $key.")
emptyList<String>()
......@@ -77,19 +120,5 @@ class DateContainerBuilder(private val containedIds: List<String>) : IFieldBuild
}
else -> emptyList()
}
dateContainers.add(
DateContainer(
date = date,
sort = sort,
qualifier = qualifier,
certainty = certainty,
facet = facetList
)
)
return "Transformed date to date container."
}
override fun build(): List<DateContainer> {
return dateContainers
}
}
\ No newline at end of file
......@@ -27,7 +27,11 @@ object DateFacetBuildHelpers {
private const val level_1 = "0"
private const val level_2 = "1"
private const val centuryName = "Jahrhundert"
private val centuryNames = mapOf(
Pair("de", "Jahrhundert"),
Pair("fr", "siècle"),
Pair("it", "secolo")
)
/**
* Builds the hierarchical facet for a normalized SingleDate date.
......@@ -36,8 +40,8 @@ object DateFacetBuildHelpers {
*
* @return The facet values to construct the hierarchy with century and decade.
*/
fun buildFromNormalizedSingleDate(date: String): List<String> {
val century = getCentury(date.substring(0, 4))
fun buildFromNormalizedSingleDate(date: String, language: String): List<String> {
val century = getCentury(date.substring(0, 4), language)
val decade = getDecade(date.substring(0, 4))
return listOf(
"$level_1$separator$century$separator",
......@@ -54,7 +58,7 @@ object DateFacetBuildHelpers {
*
* @return The facet values used by outermedia.
*/
fun buildFromNormalizedDateRange(date: String): List<String> {
fun buildFromNormalizedDateRange(date: String, language: String): List<String> {
if (date.length <= 3) {
log.error("Normalized date range has less than 4 characters: $date.")
return emptyList()
......@@ -77,17 +81,17 @@ object DateFacetBuildHelpers {
}
return if (until.isEmpty()) {
val century = getCentury(from)
val century = getCentury(from, language)
val decade = getDecade(from)
listOf(
"$level_1$separator$century$separator",
"$level_2$separator$century$separator$decade$separator"
)
} else {
val fromCentury = getCentury(from)
val fromCentury = getCentury(from, language)
val fromDecade = getDecade(from)
val untilCentury = getCentury(until)
val untilCentury = getCentury(until, language)
val untilDecade = getDecade(until)
if (fromCentury == untilCentury) {
......@@ -131,7 +135,7 @@ object DateFacetBuildHelpers {
// first adds all the centuries required
while (fromCenturyAsInt < untilCenturyAsInt) {
results.add("$level_1$separator${getCentury(fromCenturyAsInt)}$separator")
results.add("$level_1$separator${getCentury(fromCenturyAsInt, language)}$separator")
fromCenturyAsInt += 1
}
// then add the original decade.
......@@ -142,7 +146,7 @@ object DateFacetBuildHelpers {
// then add all the decades until the last decade is reached.
while (fromDecadeAsInt < untilDecadeAsInt) {
fromDecadeAsInt += 10
results.add("$level_2$separator${getCentury(fromDecadeAsInt / 100)}" +
results.add("$level_2$separator${getCentury(fromDecadeAsInt / 100, language)}" +
"$separator${getDecade(fromDecadeAsInt)}$separator")
}
// unsorted output.
......@@ -151,12 +155,12 @@ object DateFacetBuildHelpers {
}
}
private fun getCentury(year: String): String {
return getCentury(year.substring(0, 2).toInt())
private fun getCentury(year: String, language: String): String {
return getCentury(year.substring(0, 2).toInt(), language)
}
private fun getCentury(century: Int): String {
return "${century + 1}.$centuryName"
private fun getCentury(century: Int, language: String): String {
return "${century + 1}. ${centuryNames[language]}"
}
private fun getDecade(year: String): String {
......
package ch.memobase.model
data class DateFacetField(
val display: String,
val sort: String?,
val type: String,
val facet: LanguageContainer
)
......@@ -20,10 +20,8 @@ package ch.memobase.model
import com.fasterxml.jackson.annotation.JsonInclude
@JsonInclude(JsonInclude.Include.NON_EMPTY)
data class DateContainer(
data class DateSearchField(
val date: String,
val sort: String?,
val certainty: List<String>,
val qualifier: List<String>,
val facet: List<String>
val qualifier: List<String>
)
......@@ -74,9 +74,10 @@ data class DocumentsSearchDoc(
val placeFacet: SimpleFacetContainer,
// Dates
val temporal: List<DateContainer>,
val dateCreated: List<DateContainer>,
val dateIssued: List<DateContainer>,
val temporal: List<DateSearchField>,
val dateCreated: List<DateSearchField>,
val dateIssued: List<DateSearchField>,
val dateFacetField: List<DateFacetField>,
// Relations
val institution: List<FacetContainer>,
......@@ -173,6 +174,7 @@ data class DocumentsSearchDoc(
emptyList(),
emptyList(),
emptyList(),
emptyList(),
FacetContainer.EMPTY,
false,
emptyList(),
......
......@@ -21,6 +21,7 @@ import ch.memobase.helpers.ElasticSearchWrapper
import ch.memobase.helpers.JsonUtility
import ch.memobase.model.DocumentsSearchDoc
import ch.memobase.model.LanguageContainer
import com.beust.klaxon.JsonObject
import io.mockk.every
import io.mockk.mockk
import java.io.File
......@@ -44,7 +45,6 @@ class TestDocumentsSearchDoc {
every { internal.getExtraInstitutionsFromRecordSet("", "originalInstitution") } returns emptyList()
every { internal.getExtraInstitutionsFromRecordSet("", "masterInstitution") } returns emptyList()
every { internal.getInstitutionNamesFromRecordSet("") } returns emptyList()
internal
}
......@@ -68,4 +68,46 @@ class TestDocumentsSearchDoc {
DocumentsSearchDoc.DEFAULT.toJson()
)
}
@Test
fun `test date facet search and facet fields`() {
val output = transformer.transform(
"test",
mapOf(
Pair(
"record", JsonObject(
mapOf(
Pair("@id", "https://memobase.ch/record/tes-001-test"),
Pair("@type", "https://www.ica.org/standards/RiC/ontology#Record"),
Pair("created", "_:b1"),
Pair("issued", "_:b2")
)
)
),
Pair(
"_:b1", JsonObject(
mapOf(
Pair("@id", "_:b1"),
Pair("@type", "https://www.ica.org/standards/RiC/ontology#DateRange"),
Pair("normalizedDateValue", "1921/2001")
)
)
),
Pair(
"_:b2", JsonObject(
mapOf(
Pair("@id", "_:b2"),
Pair("@type", "https://www.ica.org/standards/RiC/ontology#DateRange"),
Pair("normalizedDateValue", "1910/2014")
)
)
)
)
)
assertThat(output.toJson())
.isEqualTo(
JsonUtility.parse(readFile("test_data_facet_search_and_facet_fields.json")).toJsonString()
)
}
}
\ No newline at end of file
......@@ -141,14 +141,59 @@ class TestFacetBuilders {
)
}