Commit 17039ada authored by Jonas Waeber's avatar Jonas Waeber
Browse files

Added document count for institution search doc.

parent 103e4101
......@@ -32,6 +32,8 @@ ext {
}
dependencies {
compile group: 'org.elasticsearch.client', name: 'elasticsearch-rest-high-level-client', version: '7.6.1'
implementation 'org.memobase:memobase-service-utilities:0.14.2'
// Logging Framework
......
......@@ -5,6 +5,7 @@ metadata:
namespace: memobase
data:
APPLICATION_ID: "{{ .Values.deploymentName }}-app"
ELASTIC_INDEX: "{{ .Values.elasticIndex }}"
MEDIA_SERVER_URL: "{{ .Values.mediaServerUrl }}"
TOPIC_IN: "{{ .Values.inputTopic }}"
TOPIC_OUT: "{{ .Values.outputTopic }}"
......
......@@ -23,6 +23,8 @@ spec:
envFrom:
- configMapRef:
name: "{{ .Values.kafkaConfigs }}"
- configMapRef:
name: "{{ .Values.elasticConfigs }}"
- configMapRef:
name: "{{ .Values.deploymentName}}-app-config"
volumeMounts:
......
......@@ -5,6 +5,9 @@ tag: "latest"
deploymentName: search-doc-service
kafkaConfigs: prod-kafka-bootstrap-servers
elasticConfigs: prod-elastic-configs
elasticIndex: documents
outputTopic: search-doc-output-documents
inputTopic: search-doc-input-documents
reportingTopic: postprocessing-reporting
......
......@@ -20,8 +20,10 @@ package org.memobase
import com.beust.klaxon.JsonArray
import com.beust.klaxon.JsonObject
import java.util.Properties
import org.apache.logging.log4j.LogManager
import org.memobase.helpers.Date
import org.memobase.helpers.ElasticSearchWrapper
import org.memobase.helpers.Extract
import org.memobase.helpers.InstitutionTypeMapper
import org.memobase.helpers.KEYS
......@@ -31,13 +33,14 @@ import org.memobase.model.LanguageContainer
import org.memobase.model.Schema
import org.memobase.rdf.NS
class InstitutionSearchDocBuilder(path: String) {
class InstitutionSearchDocBuilder(path: String, appSettings: Properties) {
private val log = LogManager.getLogger("InstitutionSearchDocBuilder")
private val institutionTypeMapper = InstitutionTypeMapper(path)
private val elasticSearchWrapper = ElasticSearchWrapper(appSettings)
fun transform(key: String, input: Map<String, JsonObject>): Schema {
val institution =
input["institution"] ?: throw InvalidInputException("No institution entity found in message $key.")
input["institution"] ?: throw InvalidInputException("No institution entity found in message $key.")
val identifiers = mutableListOf<JsonObject>()
val cantons = mutableListOf<FacetContainer>()
val municipalities = mutableListOf<LanguageContainer>()
......@@ -76,45 +79,47 @@ class InstitutionSearchDocBuilder(path: String) {
val name = extractLanguageContainer(institution[KEYS.name], "NoNameFound")
val description = extractLanguageContainer(institution[KEYS.descriptiveNote], "NoDescriptionFound")
val id = Extract.extractIdValue(identifiers, KEYS.IdentifierType.main) ?: "NoIdentifierFound"
val recordSetIds = Extract.listOfStrings(institution[KEYS.isHolderOf])
return InstitutionSearchDoc(
institutionId = id,
published = institution[KEYS.isPublished].let {
when (it) {
is String -> it.toBoolean()
else -> {
log.error("Found no isPublished property on institution $key. Set to false.")
false
institutionId = id,
published = institution[KEYS.isPublished].let {
when (it) {
is String -> it.toBoolean()
else -> {
log.error("Found no isPublished property on institution $key. Set to false.")
false
}
}
}
},
type = type,
name = name,
description = description,
documentType = listOf(FacetContainer.placeholder("PLACEHOLDER")),
keyVisualLink = institution[KEYS.wikidataImage].let { if (it != null) it as String else "NoKeyVisualLinkDefined" },
canton = cantons,
city = municipalities,
address = addresses,
postalCodes = postalCodes,
numberOfRecordSets = Extract.listOfStrings(institution[KEYS.isHolderOf]).count(),
numberOfDocuments = 0,
teaserColor = institution[KEYS.teaserColor].let {
if (it == null) {
log.error("No teaser colour found for institution $id.")
""
}
else
it as String
},
teaserColorComputed = institution[KEYS.teaserColorComputed].let {
if (it == null) {
log.error("No computed teaser colour found for institution $id.")
""
}
else
it as String
},
lastUpdatedDate = Date.now
},
type = type,
name = name,
description = description,
documentType = listOf(FacetContainer.placeholder("PLACEHOLDER")),
keyVisualLink = institution[KEYS.wikidataImage].let { if (it != null) it as String else "NoKeyVisualLinkDefined" },
canton = cantons,
city = municipalities,
address = addresses,
postalCodes = postalCodes,
numberOfRecordSets = recordSetIds.count(),
numberOfDocuments = recordSetIds
.map { value -> elasticSearchWrapper.countNumberOfDocuments(value) }
.sum().toInt(),
teaserColor = institution[KEYS.teaserColor].let {
if (it == null) {
log.error("No teaser colour found for institution $id.")
""
} else
it as String
},
teaserColorComputed = institution[KEYS.teaserColorComputed].let {
if (it == null) {
log.error("No computed teaser colour found for institution $id.")
""
} else
it as String
},
lastUpdatedDate = Date.now
)
}
......
......@@ -38,11 +38,12 @@ import java.io.StringWriter
class KafkaTopology(private val settings: SettingsLoader) {
private val log = LogManager.getLogger("SearchDocService")
private val appSettings = settings.appSettings
private val reportTopic = settings.processReportTopic
private val searchDocTransform = DocumentsSearchDocBuilder(settings.appSettings.getProperty(KEYS.SettingsProps.mediaUrl))
private val searchDocTransform = DocumentsSearchDocBuilder(appSettings.getProperty(KEYS.SettingsProps.mediaUrl))
private val institutionSearchDoc =
InstitutionSearchDocBuilder(settings.appSettings.getProperty(KEYS.SettingsProps.institutionTypeLabelsPath))
InstitutionSearchDocBuilder(appSettings.getProperty(KEYS.SettingsProps.institutionTypeLabelsPath), appSettings)
private val jsonWriter = ObjectMapper().registerKotlinModule().writer()
......
......@@ -28,12 +28,15 @@ class Service(file: String = "app.yml") {
private val log = LogManager.getLogger("SearchDocService")
val settings = SettingsLoader(
listOf(
KEYS.SettingsProps.institutionTypeLabelsPath,
KEYS.SettingsProps.mediaUrl
),
file,
useStreamsConfig = true
listOf(
KEYS.SettingsProps.institutionTypeLabelsPath,
KEYS.SettingsProps.mediaUrl,
KEYS.SettingsProps.elasticHost,
KEYS.SettingsProps.elasticPort,
KEYS.SettingsProps.elasticIndex
),
file,
useStreamsConfig = true
)
val topology = KafkaTopology(settings).build()
......
package org.memobase.helpers
import java.util.Properties
import org.apache.http.HttpHost
import org.apache.logging.log4j.LogManager
import org.elasticsearch.ElasticsearchException
import org.elasticsearch.client.RequestOptions
import org.elasticsearch.client.RestClient
import org.elasticsearch.client.RestHighLevelClient
import org.elasticsearch.client.core.CountRequest
import org.elasticsearch.client.indices.GetIndexRequest
import org.elasticsearch.index.query.QueryBuilders
class ElasticSearchWrapper(settings: Properties) {
private val log = LogManager.getLogger("ElasticSearchWrapper")
private val host = settings.getProperty(KEYS.SettingsProps.elasticHost)
private val port = settings.getProperty(KEYS.SettingsProps.elasticPort).toInt()
private val documentsIndex = settings.getProperty(KEYS.SettingsProps.elasticIndex)
private val client = connect()
private fun connect(): RestHighLevelClient? {
return try {
val c = RestHighLevelClient(
RestClient.builder(
HttpHost(host, port)
))
if (!c.indices().exists(GetIndexRequest(documentsIndex), RequestOptions.DEFAULT)) {
log.error("Could not find the index defined in the configuration: $documentsIndex.")
null
} else {
log.info("Successfully connected to index $documentsIndex. Ready to query.")
c
}
} catch (ex: ElasticsearchException) {
log.error(ex.detailedMessage)
null
}
}
fun countNumberOfDocuments(recordSetIdentifier: String): Int {
return client.let {
if (it == null) {
log.error("Not connected to an index. Count is at zero! Restart service to retry connection.")
0
} else {
val request = CountRequest(documentsIndex)
request.query(
QueryBuilders.termQuery(
"recordSet.facet", recordSetIdentifier
)
)
try {
val response = it.count(request, RequestOptions.DEFAULT
)
response.count.toInt()
} catch (ex: ElasticsearchException) {
log.error(ex.detailedMessage)
0
}
}
}
}
}
\ No newline at end of file
......@@ -22,6 +22,9 @@ object KEYS {
object SettingsProps {
const val mediaUrl = "media.url"
const val institutionTypeLabelsPath = "institutionTypeLabelsPath"
const val elasticHost = "elastic.host"
const val elasticPort = "elastic.port"
const val elasticIndex = "elastic.index"
}
const val entityId = "@id"
......
app:
elastic:
host: ${ELASTIC_HOST:?system}
port: ${ELASTIC_PORT:?system}
index: ${ELASTIC_INDEX:?system}
media:
url: ${MEDIA_SERVER_URL:?system}
institutionTypeLabelsPath: "/configs/institution_types/labels.csv"
......
app:
elastic:
host: localhost
port: 9200
index: documents
media:
url: "https://media.memobase.k8s.unibas.ch/memo/"
institutionTypeLabelsPath: "src/test/resources/configs/institution-type-labels.csv"
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment