Commit 17039ada authored by Jonas Waeber's avatar Jonas Waeber
Browse files

Added document count for institution search doc.

parent 103e4101
...@@ -32,6 +32,8 @@ ext { ...@@ -32,6 +32,8 @@ ext {
} }
dependencies { dependencies {
compile group: 'org.elasticsearch.client', name: 'elasticsearch-rest-high-level-client', version: '7.6.1'
implementation 'org.memobase:memobase-service-utilities:0.14.2' implementation 'org.memobase:memobase-service-utilities:0.14.2'
// Logging Framework // Logging Framework
......
...@@ -5,6 +5,7 @@ metadata: ...@@ -5,6 +5,7 @@ metadata:
namespace: memobase namespace: memobase
data: data:
APPLICATION_ID: "{{ .Values.deploymentName }}-app" APPLICATION_ID: "{{ .Values.deploymentName }}-app"
ELASTIC_INDEX: "{{ .Values.elasticIndex }}"
MEDIA_SERVER_URL: "{{ .Values.mediaServerUrl }}" MEDIA_SERVER_URL: "{{ .Values.mediaServerUrl }}"
TOPIC_IN: "{{ .Values.inputTopic }}" TOPIC_IN: "{{ .Values.inputTopic }}"
TOPIC_OUT: "{{ .Values.outputTopic }}" TOPIC_OUT: "{{ .Values.outputTopic }}"
......
...@@ -23,6 +23,8 @@ spec: ...@@ -23,6 +23,8 @@ spec:
envFrom: envFrom:
- configMapRef: - configMapRef:
name: "{{ .Values.kafkaConfigs }}" name: "{{ .Values.kafkaConfigs }}"
- configMapRef:
name: "{{ .Values.elasticConfigs }}"
- configMapRef: - configMapRef:
name: "{{ .Values.deploymentName}}-app-config" name: "{{ .Values.deploymentName}}-app-config"
volumeMounts: volumeMounts:
......
...@@ -5,6 +5,9 @@ tag: "latest" ...@@ -5,6 +5,9 @@ tag: "latest"
deploymentName: search-doc-service deploymentName: search-doc-service
kafkaConfigs: prod-kafka-bootstrap-servers kafkaConfigs: prod-kafka-bootstrap-servers
elasticConfigs: prod-elastic-configs
elasticIndex: documents
outputTopic: search-doc-output-documents outputTopic: search-doc-output-documents
inputTopic: search-doc-input-documents inputTopic: search-doc-input-documents
reportingTopic: postprocessing-reporting reportingTopic: postprocessing-reporting
......
...@@ -20,8 +20,10 @@ package org.memobase ...@@ -20,8 +20,10 @@ package org.memobase
import com.beust.klaxon.JsonArray import com.beust.klaxon.JsonArray
import com.beust.klaxon.JsonObject import com.beust.klaxon.JsonObject
import java.util.Properties
import org.apache.logging.log4j.LogManager import org.apache.logging.log4j.LogManager
import org.memobase.helpers.Date import org.memobase.helpers.Date
import org.memobase.helpers.ElasticSearchWrapper
import org.memobase.helpers.Extract import org.memobase.helpers.Extract
import org.memobase.helpers.InstitutionTypeMapper import org.memobase.helpers.InstitutionTypeMapper
import org.memobase.helpers.KEYS import org.memobase.helpers.KEYS
...@@ -31,9 +33,10 @@ import org.memobase.model.LanguageContainer ...@@ -31,9 +33,10 @@ import org.memobase.model.LanguageContainer
import org.memobase.model.Schema import org.memobase.model.Schema
import org.memobase.rdf.NS import org.memobase.rdf.NS
class InstitutionSearchDocBuilder(path: String) { class InstitutionSearchDocBuilder(path: String, appSettings: Properties) {
private val log = LogManager.getLogger("InstitutionSearchDocBuilder") private val log = LogManager.getLogger("InstitutionSearchDocBuilder")
private val institutionTypeMapper = InstitutionTypeMapper(path) private val institutionTypeMapper = InstitutionTypeMapper(path)
private val elasticSearchWrapper = ElasticSearchWrapper(appSettings)
fun transform(key: String, input: Map<String, JsonObject>): Schema { fun transform(key: String, input: Map<String, JsonObject>): Schema {
val institution = val institution =
...@@ -76,6 +79,8 @@ class InstitutionSearchDocBuilder(path: String) { ...@@ -76,6 +79,8 @@ class InstitutionSearchDocBuilder(path: String) {
val name = extractLanguageContainer(institution[KEYS.name], "NoNameFound") val name = extractLanguageContainer(institution[KEYS.name], "NoNameFound")
val description = extractLanguageContainer(institution[KEYS.descriptiveNote], "NoDescriptionFound") val description = extractLanguageContainer(institution[KEYS.descriptiveNote], "NoDescriptionFound")
val id = Extract.extractIdValue(identifiers, KEYS.IdentifierType.main) ?: "NoIdentifierFound" val id = Extract.extractIdValue(identifiers, KEYS.IdentifierType.main) ?: "NoIdentifierFound"
val recordSetIds = Extract.listOfStrings(institution[KEYS.isHolderOf])
return InstitutionSearchDoc( return InstitutionSearchDoc(
institutionId = id, institutionId = id,
published = institution[KEYS.isPublished].let { published = institution[KEYS.isPublished].let {
...@@ -96,22 +101,22 @@ class InstitutionSearchDocBuilder(path: String) { ...@@ -96,22 +101,22 @@ class InstitutionSearchDocBuilder(path: String) {
city = municipalities, city = municipalities,
address = addresses, address = addresses,
postalCodes = postalCodes, postalCodes = postalCodes,
numberOfRecordSets = Extract.listOfStrings(institution[KEYS.isHolderOf]).count(), numberOfRecordSets = recordSetIds.count(),
numberOfDocuments = 0, numberOfDocuments = recordSetIds
.map { value -> elasticSearchWrapper.countNumberOfDocuments(value) }
.sum().toInt(),
teaserColor = institution[KEYS.teaserColor].let { teaserColor = institution[KEYS.teaserColor].let {
if (it == null) { if (it == null) {
log.error("No teaser colour found for institution $id.") log.error("No teaser colour found for institution $id.")
"" ""
} } else
else
it as String it as String
}, },
teaserColorComputed = institution[KEYS.teaserColorComputed].let { teaserColorComputed = institution[KEYS.teaserColorComputed].let {
if (it == null) { if (it == null) {
log.error("No computed teaser colour found for institution $id.") log.error("No computed teaser colour found for institution $id.")
"" ""
} } else
else
it as String it as String
}, },
lastUpdatedDate = Date.now lastUpdatedDate = Date.now
......
...@@ -38,11 +38,12 @@ import java.io.StringWriter ...@@ -38,11 +38,12 @@ import java.io.StringWriter
class KafkaTopology(private val settings: SettingsLoader) { class KafkaTopology(private val settings: SettingsLoader) {
private val log = LogManager.getLogger("SearchDocService") private val log = LogManager.getLogger("SearchDocService")
private val appSettings = settings.appSettings
private val reportTopic = settings.processReportTopic private val reportTopic = settings.processReportTopic
private val searchDocTransform = DocumentsSearchDocBuilder(settings.appSettings.getProperty(KEYS.SettingsProps.mediaUrl)) private val searchDocTransform = DocumentsSearchDocBuilder(appSettings.getProperty(KEYS.SettingsProps.mediaUrl))
private val institutionSearchDoc = private val institutionSearchDoc =
InstitutionSearchDocBuilder(settings.appSettings.getProperty(KEYS.SettingsProps.institutionTypeLabelsPath)) InstitutionSearchDocBuilder(appSettings.getProperty(KEYS.SettingsProps.institutionTypeLabelsPath), appSettings)
private val jsonWriter = ObjectMapper().registerKotlinModule().writer() private val jsonWriter = ObjectMapper().registerKotlinModule().writer()
......
...@@ -30,7 +30,10 @@ class Service(file: String = "app.yml") { ...@@ -30,7 +30,10 @@ class Service(file: String = "app.yml") {
val settings = SettingsLoader( val settings = SettingsLoader(
listOf( listOf(
KEYS.SettingsProps.institutionTypeLabelsPath, KEYS.SettingsProps.institutionTypeLabelsPath,
KEYS.SettingsProps.mediaUrl KEYS.SettingsProps.mediaUrl,
KEYS.SettingsProps.elasticHost,
KEYS.SettingsProps.elasticPort,
KEYS.SettingsProps.elasticIndex
), ),
file, file,
useStreamsConfig = true useStreamsConfig = true
......
package org.memobase.helpers
import java.util.Properties
import org.apache.http.HttpHost
import org.apache.logging.log4j.LogManager
import org.elasticsearch.ElasticsearchException
import org.elasticsearch.client.RequestOptions
import org.elasticsearch.client.RestClient
import org.elasticsearch.client.RestHighLevelClient
import org.elasticsearch.client.core.CountRequest
import org.elasticsearch.client.indices.GetIndexRequest
import org.elasticsearch.index.query.QueryBuilders
class ElasticSearchWrapper(settings: Properties) {
private val log = LogManager.getLogger("ElasticSearchWrapper")
private val host = settings.getProperty(KEYS.SettingsProps.elasticHost)
private val port = settings.getProperty(KEYS.SettingsProps.elasticPort).toInt()
private val documentsIndex = settings.getProperty(KEYS.SettingsProps.elasticIndex)
private val client = connect()
private fun connect(): RestHighLevelClient? {
return try {
val c = RestHighLevelClient(
RestClient.builder(
HttpHost(host, port)
))
if (!c.indices().exists(GetIndexRequest(documentsIndex), RequestOptions.DEFAULT)) {
log.error("Could not find the index defined in the configuration: $documentsIndex.")
null
} else {
log.info("Successfully connected to index $documentsIndex. Ready to query.")
c
}
} catch (ex: ElasticsearchException) {
log.error(ex.detailedMessage)
null
}
}
fun countNumberOfDocuments(recordSetIdentifier: String): Int {
return client.let {
if (it == null) {
log.error("Not connected to an index. Count is at zero! Restart service to retry connection.")
0
} else {
val request = CountRequest(documentsIndex)
request.query(
QueryBuilders.termQuery(
"recordSet.facet", recordSetIdentifier
)
)
try {
val response = it.count(request, RequestOptions.DEFAULT
)
response.count.toInt()
} catch (ex: ElasticsearchException) {
log.error(ex.detailedMessage)
0
}
}
}
}
}
\ No newline at end of file
...@@ -22,6 +22,9 @@ object KEYS { ...@@ -22,6 +22,9 @@ object KEYS {
object SettingsProps { object SettingsProps {
const val mediaUrl = "media.url" const val mediaUrl = "media.url"
const val institutionTypeLabelsPath = "institutionTypeLabelsPath" const val institutionTypeLabelsPath = "institutionTypeLabelsPath"
const val elasticHost = "elastic.host"
const val elasticPort = "elastic.port"
const val elasticIndex = "elastic.index"
} }
const val entityId = "@id" const val entityId = "@id"
......
app: app:
elastic:
host: ${ELASTIC_HOST:?system}
port: ${ELASTIC_PORT:?system}
index: ${ELASTIC_INDEX:?system}
media: media:
url: ${MEDIA_SERVER_URL:?system} url: ${MEDIA_SERVER_URL:?system}
institutionTypeLabelsPath: "/configs/institution_types/labels.csv" institutionTypeLabelsPath: "/configs/institution_types/labels.csv"
......
app: app:
elastic:
host: localhost
port: 9200
index: documents
media: media:
url: "https://media.memobase.k8s.unibas.ch/memo/" url: "https://media.memobase.k8s.unibas.ch/memo/"
institutionTypeLabelsPath: "src/test/resources/configs/institution-type-labels.csv" institutionTypeLabelsPath: "src/test/resources/configs/institution-type-labels.csv"
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment