In order to mitigate against the brute force attacks against Gitlab accounts, we are moving to all edu-ID Logins. We would like to remind you to link your account with your edu-id. Login will be possible only by edu-ID after November 30, 2021. Here you can find the instructions for linking your account.

If you don't have a SWITCH edu-ID, you can create one with this guide here

kind regards

This Server has been upgraded to GitLab release 14.2.6

Commit 17039ada authored by Jonas Waeber's avatar Jonas Waeber
Browse files

Added document count for institution search doc.

parent 103e4101
......@@ -32,6 +32,8 @@ ext {
}
dependencies {
compile group: 'org.elasticsearch.client', name: 'elasticsearch-rest-high-level-client', version: '7.6.1'
implementation 'org.memobase:memobase-service-utilities:0.14.2'
// Logging Framework
......
......@@ -5,6 +5,7 @@ metadata:
namespace: memobase
data:
APPLICATION_ID: "{{ .Values.deploymentName }}-app"
ELASTIC_INDEX: "{{ .Values.elasticIndex }}"
MEDIA_SERVER_URL: "{{ .Values.mediaServerUrl }}"
TOPIC_IN: "{{ .Values.inputTopic }}"
TOPIC_OUT: "{{ .Values.outputTopic }}"
......
......@@ -23,6 +23,8 @@ spec:
envFrom:
- configMapRef:
name: "{{ .Values.kafkaConfigs }}"
- configMapRef:
name: "{{ .Values.elasticConfigs }}"
- configMapRef:
name: "{{ .Values.deploymentName}}-app-config"
volumeMounts:
......
......@@ -5,6 +5,9 @@ tag: "latest"
deploymentName: search-doc-service
kafkaConfigs: prod-kafka-bootstrap-servers
elasticConfigs: prod-elastic-configs
elasticIndex: documents
outputTopic: search-doc-output-documents
inputTopic: search-doc-input-documents
reportingTopic: postprocessing-reporting
......
......@@ -20,8 +20,10 @@ package org.memobase
import com.beust.klaxon.JsonArray
import com.beust.klaxon.JsonObject
import java.util.Properties
import org.apache.logging.log4j.LogManager
import org.memobase.helpers.Date
import org.memobase.helpers.ElasticSearchWrapper
import org.memobase.helpers.Extract
import org.memobase.helpers.InstitutionTypeMapper
import org.memobase.helpers.KEYS
......@@ -31,13 +33,14 @@ import org.memobase.model.LanguageContainer
import org.memobase.model.Schema
import org.memobase.rdf.NS
class InstitutionSearchDocBuilder(path: String) {
class InstitutionSearchDocBuilder(path: String, appSettings: Properties) {
private val log = LogManager.getLogger("InstitutionSearchDocBuilder")
private val institutionTypeMapper = InstitutionTypeMapper(path)
private val elasticSearchWrapper = ElasticSearchWrapper(appSettings)
fun transform(key: String, input: Map<String, JsonObject>): Schema {
val institution =
input["institution"] ?: throw InvalidInputException("No institution entity found in message $key.")
input["institution"] ?: throw InvalidInputException("No institution entity found in message $key.")
val identifiers = mutableListOf<JsonObject>()
val cantons = mutableListOf<FacetContainer>()
val municipalities = mutableListOf<LanguageContainer>()
......@@ -76,45 +79,47 @@ class InstitutionSearchDocBuilder(path: String) {
val name = extractLanguageContainer(institution[KEYS.name], "NoNameFound")
val description = extractLanguageContainer(institution[KEYS.descriptiveNote], "NoDescriptionFound")
val id = Extract.extractIdValue(identifiers, KEYS.IdentifierType.main) ?: "NoIdentifierFound"
val recordSetIds = Extract.listOfStrings(institution[KEYS.isHolderOf])
return InstitutionSearchDoc(
institutionId = id,
published = institution[KEYS.isPublished].let {
when (it) {
is String -> it.toBoolean()
else -> {
log.error("Found no isPublished property on institution $key. Set to false.")
false
institutionId = id,
published = institution[KEYS.isPublished].let {
when (it) {
is String -> it.toBoolean()
else -> {
log.error("Found no isPublished property on institution $key. Set to false.")
false
}
}
}
},
type = type,
name = name,
description = description,
documentType = listOf(FacetContainer.placeholder("PLACEHOLDER")),
keyVisualLink = institution[KEYS.wikidataImage].let { if (it != null) it as String else "NoKeyVisualLinkDefined" },
canton = cantons,
city = municipalities,
address = addresses,
postalCodes = postalCodes,
numberOfRecordSets = Extract.listOfStrings(institution[KEYS.isHolderOf]).count(),
numberOfDocuments = 0,
teaserColor = institution[KEYS.teaserColor].let {
if (it == null) {
log.error("No teaser colour found for institution $id.")
""
}
else
it as String
},
teaserColorComputed = institution[KEYS.teaserColorComputed].let {
if (it == null) {
log.error("No computed teaser colour found for institution $id.")
""
}
else
it as String
},
lastUpdatedDate = Date.now
},
type = type,
name = name,
description = description,
documentType = listOf(FacetContainer.placeholder("PLACEHOLDER")),
keyVisualLink = institution[KEYS.wikidataImage].let { if (it != null) it as String else "NoKeyVisualLinkDefined" },
canton = cantons,
city = municipalities,
address = addresses,
postalCodes = postalCodes,
numberOfRecordSets = recordSetIds.count(),
numberOfDocuments = recordSetIds
.map { value -> elasticSearchWrapper.countNumberOfDocuments(value) }
.sum().toInt(),
teaserColor = institution[KEYS.teaserColor].let {
if (it == null) {
log.error("No teaser colour found for institution $id.")
""
} else
it as String
},
teaserColorComputed = institution[KEYS.teaserColorComputed].let {
if (it == null) {
log.error("No computed teaser colour found for institution $id.")
""
} else
it as String
},
lastUpdatedDate = Date.now
)
}
......
......@@ -38,11 +38,12 @@ import java.io.StringWriter
class KafkaTopology(private val settings: SettingsLoader) {
private val log = LogManager.getLogger("SearchDocService")
private val appSettings = settings.appSettings
private val reportTopic = settings.processReportTopic
private val searchDocTransform = DocumentsSearchDocBuilder(settings.appSettings.getProperty(KEYS.SettingsProps.mediaUrl))
private val searchDocTransform = DocumentsSearchDocBuilder(appSettings.getProperty(KEYS.SettingsProps.mediaUrl))
private val institutionSearchDoc =
InstitutionSearchDocBuilder(settings.appSettings.getProperty(KEYS.SettingsProps.institutionTypeLabelsPath))
InstitutionSearchDocBuilder(appSettings.getProperty(KEYS.SettingsProps.institutionTypeLabelsPath), appSettings)
private val jsonWriter = ObjectMapper().registerKotlinModule().writer()
......
......@@ -28,12 +28,15 @@ class Service(file: String = "app.yml") {
private val log = LogManager.getLogger("SearchDocService")
val settings = SettingsLoader(
listOf(
KEYS.SettingsProps.institutionTypeLabelsPath,
KEYS.SettingsProps.mediaUrl
),
file,
useStreamsConfig = true
listOf(
KEYS.SettingsProps.institutionTypeLabelsPath,
KEYS.SettingsProps.mediaUrl,
KEYS.SettingsProps.elasticHost,
KEYS.SettingsProps.elasticPort,
KEYS.SettingsProps.elasticIndex
),
file,
useStreamsConfig = true
)
val topology = KafkaTopology(settings).build()
......
package org.memobase.helpers
import java.util.Properties
import org.apache.http.HttpHost
import org.apache.logging.log4j.LogManager
import org.elasticsearch.ElasticsearchException
import org.elasticsearch.client.RequestOptions
import org.elasticsearch.client.RestClient
import org.elasticsearch.client.RestHighLevelClient
import org.elasticsearch.client.core.CountRequest
import org.elasticsearch.client.indices.GetIndexRequest
import org.elasticsearch.index.query.QueryBuilders
class ElasticSearchWrapper(settings: Properties) {
private val log = LogManager.getLogger("ElasticSearchWrapper")
private val host = settings.getProperty(KEYS.SettingsProps.elasticHost)
private val port = settings.getProperty(KEYS.SettingsProps.elasticPort).toInt()
private val documentsIndex = settings.getProperty(KEYS.SettingsProps.elasticIndex)
private val client = connect()
private fun connect(): RestHighLevelClient? {
return try {
val c = RestHighLevelClient(
RestClient.builder(
HttpHost(host, port)
))
if (!c.indices().exists(GetIndexRequest(documentsIndex), RequestOptions.DEFAULT)) {
log.error("Could not find the index defined in the configuration: $documentsIndex.")
null
} else {
log.info("Successfully connected to index $documentsIndex. Ready to query.")
c
}
} catch (ex: ElasticsearchException) {
log.error(ex.detailedMessage)
null
}
}
fun countNumberOfDocuments(recordSetIdentifier: String): Int {
return client.let {
if (it == null) {
log.error("Not connected to an index. Count is at zero! Restart service to retry connection.")
0
} else {
val request = CountRequest(documentsIndex)
request.query(
QueryBuilders.termQuery(
"recordSet.facet", recordSetIdentifier
)
)
try {
val response = it.count(request, RequestOptions.DEFAULT
)
response.count.toInt()
} catch (ex: ElasticsearchException) {
log.error(ex.detailedMessage)
0
}
}
}
}
}
\ No newline at end of file
......@@ -22,6 +22,9 @@ object KEYS {
object SettingsProps {
const val mediaUrl = "media.url"
const val institutionTypeLabelsPath = "institutionTypeLabelsPath"
const val elasticHost = "elastic.host"
const val elasticPort = "elastic.port"
const val elasticIndex = "elastic.index"
}
const val entityId = "@id"
......
app:
elastic:
host: ${ELASTIC_HOST:?system}
port: ${ELASTIC_PORT:?system}
index: ${ELASTIC_INDEX:?system}
media:
url: ${MEDIA_SERVER_URL:?system}
institutionTypeLabelsPath: "/configs/institution_types/labels.csv"
......
app:
elastic:
host: localhost
port: 9200
index: documents
media:
url: "https://media.memobase.k8s.unibas.ch/memo/"
institutionTypeLabelsPath: "src/test/resources/configs/institution-type-labels.csv"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment