Commit 9337edd0 authored by Jonas Waeber's avatar Jonas Waeber
Browse files

Fix institution search doc es request.

parent fb7a5e35
Pipeline #20540 passed with stages
in 5 minutes and 21 seconds
...@@ -80,7 +80,8 @@ class InstitutionSearchDocBuilder( ...@@ -80,7 +80,8 @@ class InstitutionSearchDocBuilder(
val name = extractLanguageContainer(institution[KEYS.name], "NoNameFound") val name = extractLanguageContainer(institution[KEYS.name], "NoNameFound")
val description = extractLanguageContainer(institution[KEYS.descriptiveNote], "NoDescriptionFound") val description = extractLanguageContainer(institution[KEYS.descriptiveNote], "NoDescriptionFound")
val id = Extract.extractIdValue(identifiers, KEYS.IdentifierType.main) ?: "NoIdentifierFound" val id = Extract.extractIdValue(identifiers, KEYS.IdentifierType.main) ?: "NoIdentifierFound"
val recordSetIds = Extract.listOfStrings(institution[KEYS.isHolderOf]) val recordSetUris = Extract.listOfStrings(institution[KEYS.isHolderOf])
val recordSetIds = recordSetUris.map { it.substringAfterLast("/") }
return InstitutionSearchDoc( return InstitutionSearchDoc(
institutionId = id, institutionId = id,
......
...@@ -59,7 +59,7 @@ class ElasticSearchWrapper( ...@@ -59,7 +59,7 @@ class ElasticSearchWrapper(
* @return Number of Documents * @return Number of Documents
*/ */
fun countNumberOfDocuments(recordSetIdentifier: String): Int { fun countNumberOfDocuments(recordSetIdentifier: String): Int {
log.info("Counting documents for record set $recordSetIdentifier.") log.info("Counting documents for record set $recordSetIdentifier from index $documentsIndex.")
val request = CountRequest(documentsIndex) val request = CountRequest(documentsIndex)
request.query( request.query(
termQuery( termQuery(
...@@ -71,7 +71,7 @@ class ElasticSearchWrapper( ...@@ -71,7 +71,7 @@ class ElasticSearchWrapper(
request, RequestOptions.DEFAULT request, RequestOptions.DEFAULT
) )
val count = response.count.toInt() val count = response.count.toInt()
log.info("Found $count documents for record set $recordSetIdentifier.") log.info("Found $count documents for record set $recordSetIdentifier from index $documentsIndex.")
count count
} catch (ex: ElasticsearchException) { } catch (ex: ElasticsearchException) {
log.error(ex.detailedMessage) log.error(ex.detailedMessage)
...@@ -89,7 +89,7 @@ class ElasticSearchWrapper( ...@@ -89,7 +89,7 @@ class ElasticSearchWrapper(
*/ */
fun getDocumentTypesFromRecords(recordSetIdentifier: String, queryField: String): List<FacetContainer> { fun getDocumentTypesFromRecords(recordSetIdentifier: String, queryField: String): List<FacetContainer> {
return try { return try {
log.info("Attempting to load document type for $recordSetIdentifier in field $queryField.") log.info("Attempting to load document type for $recordSetIdentifier in field $queryField from index $documentsIndex.")
val resultFacets = mutableListOf<FacetContainer>() val resultFacets = mutableListOf<FacetContainer>()
val typeSet = mutableSetOf<String>() val typeSet = mutableSetOf<String>()
val scroll = Scroll(TimeValue.timeValueMinutes(1L)) val scroll = Scroll(TimeValue.timeValueMinutes(1L))
...@@ -136,7 +136,7 @@ class ElasticSearchWrapper( ...@@ -136,7 +136,7 @@ class ElasticSearchWrapper(
val clearScrollRequest = ClearScrollRequest() val clearScrollRequest = ClearScrollRequest()
clearScrollRequest.addScrollId(scrollId) clearScrollRequest.addScrollId(scrollId)
client.clearScroll(clearScrollRequest, RequestOptions.DEFAULT) client.clearScroll(clearScrollRequest, RequestOptions.DEFAULT)
log.info("Found the following types $typeSet for record set $recordSetIdentifier.") log.info("Found the following types $typeSet for record set $recordSetIdentifier from index $documentsIndex.")
resultFacets resultFacets
} catch (ex: ElasticsearchException) { } catch (ex: ElasticsearchException) {
log.error(ex.detailedMessage) log.error(ex.detailedMessage)
......
...@@ -15,7 +15,10 @@ import org.junit.jupiter.api.Disabled ...@@ -15,7 +15,10 @@ import org.junit.jupiter.api.Disabled
import org.junit.jupiter.api.Test import org.junit.jupiter.api.Test
import org.junit.jupiter.api.TestInstance import org.junit.jupiter.api.TestInstance
import org.junit.jupiter.api.assertAll import org.junit.jupiter.api.assertAll
import org.memobase.helpers.ElasticSearchWrapper
import org.memobase.helpers.JSON
import org.memobase.model.FacetContainer import org.memobase.model.FacetContainer
import org.memobase.model.InstitutionSearchDoc
import org.memobase.model.LanguageContainer import org.memobase.model.LanguageContainer
@TestInstance(TestInstance.Lifecycle.PER_CLASS) @TestInstance(TestInstance.Lifecycle.PER_CLASS)
...@@ -31,16 +34,48 @@ class TestInstitutionSearchDoc { ...@@ -31,16 +34,48 @@ class TestInstitutionSearchDoc {
@Test @Test
fun `test institution search doc`() { @Disabled
fun `test institution search doc with production es client`() {
val props = App.createSettings("kafkaTest1.yml")
val elastic = ElasticSearchWrapper(
props.appSettings,
TestUtilities.connectToElasticSearch("localhost", 8080, "documents-v17"),
TestUtilities.translationMappers
)
val input = JSON.unpack(JSON.parse(readFile("input-prod.json")))
val builder = InstitutionSearchDocBuilder(TestUtilities.translationMappers, elastic)
val output = builder.transform("aag", input) as InstitutionSearchDoc
assertAll("",
{
assertThat(output.numberOfDocuments).isEqualTo(102)
},
{
assertThat(output.documentType).isEqualTo(
listOf(
FacetContainer(
LanguageContainer(listOf("Fotografie"), listOf("Photographie"), listOf("Fotografia"), emptyList()),
"Foto",
emptyList()
)
)
)
}
)
} }
@Test @Test
fun `integration test institution`() { fun `integration test institution`() {
val settings = App.createSettings("kafkaTest1.yml") val settings = App.createSettings("kafkaTest1.yml")
every { TestUtilities.elasticSearchWrapperMocked.countNumberOfDocuments("https://memobase.ch/recordSet/sts-001") } returns 123 every { TestUtilities.elasticSearchWrapperMocked.countNumberOfDocuments("sts-001") } returns 123
every { TestUtilities.elasticSearchWrapperMocked.getDocumentTypesFromRecords("https://memobase.ch/recordSet/sts-001", "recordSet.facet") } returns listOf( every {
TestUtilities.elasticSearchWrapperMocked.getDocumentTypesFromRecords(
"sts-001",
"recordSet.facet"
)
} returns listOf(
FacetContainer( FacetContainer(
LanguageContainer(listOf("Foto"), listOf("Foto"), listOf("Foto"), emptyList()), "Foto", emptyList() LanguageContainer(listOf("Foto"), listOf("Foto"), listOf("Foto"), emptyList()), "Foto", emptyList()
) )
......
package org.memobase package org.memobase
import io.mockk.mockk import io.mockk.mockk
import java.net.ConnectException
import java.net.SocketTimeoutException
import kotlin.system.exitProcess
import org.apache.http.HttpHost
import org.apache.logging.log4j.LogManager
import org.elasticsearch.ElasticsearchException
import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest
import org.elasticsearch.client.RequestOptions
import org.elasticsearch.client.RestClient
import org.elasticsearch.client.RestHighLevelClient import org.elasticsearch.client.RestHighLevelClient
import org.elasticsearch.client.indices.GetIndexRequest
import org.memobase.helpers.ElasticSearchWrapper import org.memobase.helpers.ElasticSearchWrapper
import org.memobase.helpers.TranslationMappers import org.memobase.helpers.TranslationMappers
object TestUtilities { object TestUtilities {
private val log = LogManager.getLogger("TestUtils")
private const val institutionTypePath = "src/test/resources/configs/institution-type-labels.csv" private const val institutionTypePath = "src/test/resources/configs/institution-type-labels.csv"
private const val accessTermPath = "src/test/resources/configs/access-term-labels.csv" private const val accessTermPath = "src/test/resources/configs/access-term-labels.csv"
private const val documentTypePath = "src/test/resources/configs/document-type-labels.csv" private const val documentTypePath = "src/test/resources/configs/document-type-labels.csv"
...@@ -18,4 +29,34 @@ object TestUtilities { ...@@ -18,4 +29,34 @@ object TestUtilities {
val elasticSearchClient = mockk<RestHighLevelClient>() val elasticSearchClient = mockk<RestHighLevelClient>()
val elasticSearchWrapperMocked = mockk<ElasticSearchWrapper>() val elasticSearchWrapperMocked = mockk<ElasticSearchWrapper>()
fun connectToElasticSearch(host: String, port: Int, documentsIndex: String): RestHighLevelClient {
return try {
val c = RestHighLevelClient(
RestClient.builder(
HttpHost(host, port)
)
)
val indexExists = c.indices().exists(GetIndexRequest(documentsIndex), RequestOptions.DEFAULT)
val aliasExists = c.indices().existsAlias(GetAliasesRequest(documentsIndex), RequestOptions.DEFAULT)
if (!indexExists && !aliasExists) {
log.error("Could not find the index or alias defined in the configuration: $documentsIndex.")
exitProcess(1)
} else {
log.info("Successfully connected to index $documentsIndex. Ready to query.")
c
}
} catch (ex: ElasticsearchException) {
log.error(ex.detailedMessage)
exitProcess(1)
} catch (ex: SocketTimeoutException) {
log.error(ex.localizedMessage)
exitProcess(1)
} catch (ex: ConnectException) {
log.error(ex.localizedMessage)
exitProcess(1)
}
}
} }
\ No newline at end of file
{
"@graph": [
{
"@id": "_:b0",
"@type": "https://www.ica.org/standards/RiC/ontology#Place",
"P131": [
"_:b1",
"_:b2"
],
"P17": "http://www.wikidata.org/entity/Q39",
"P281": "5001",
"P625": "8.049455603, 47.38787196",
"P6375": "Entfelderstrasse 22",
"P669": "Entfelderstrasse",
"P670": "22"
},
{
"@id": "_:b1",
"@type": "https://www.ica.org/standards/RiC/ontology#Place",
"sameAs": "http://www.wikidata.org/entity/Q11972",
"name": [
{
"@language": "de",
"@value": "Aargau"
},
{
"@language": "fr",
"@value": "Argovie"
},
{
"@language": "it",
"@value": "Argovie"
}
],
"type": "canton"
},
{
"@id": "_:b2",
"@type": "https://www.ica.org/standards/RiC/ontology#Place",
"name": "Aarau",
"type": "municipality"
},
{
"@id": "_:b3",
"@type": "https://www.ica.org/standards/RiC/ontology#Identifier",
"identifier": "aag",
"type": "main"
},
{
"@id": "_:b4",
"@type": "https://www.ica.org/standards/RiC/ontology#Identifier",
"identifier": "StAAG",
"type": "oldMemobase"
},
{
"@id": "https://memobase.ch/institution/aag",
"@type": "https://www.ica.org/standards/RiC/ontology#CorporateBody",
"P2699": "https://www.ag.ch/staatsarchiv/suche/suchinfo.aspx",
"P31": "http://www.wikidata.org/entity/Q166118",
"P791": "CH-000051-7",
"P856": "https://www.ag.ch/staatsarchiv",
"P968": "staatsarchiv@ag.ch",
"eventType": "CREATE",
"isPublished": true,
"descriptiveNote": {
"@language": "de",
"@value": "<p>Das Staatsarchiv Aargau ist das zentrale Archiv des Kantons und seiner Rechtsvorgänger. Es sammelt das archivalische Kulturgut des Kantons und sorgt für die fachgerechte Aufbewahrung, Erschliessung und Vermittlung. Seine Bestände reichen vom Hochmittelalter bis in die Gegenwart. Neben wertvollen Urkunden und Zeugnissen vormoderner Herrschaftsfixierung bewahrt das Staatsarchiv die systematische Dokumentation der Verwaltungstätigkeit seit der Kantonsgründung 1803. Seine Kernbestände ergänzt es durch Nachlässe privater Herkunft, die von öffentlichem Interesse sind. Seit 2009 ist das Staatsarchiv Aargau im Besitz der Fotobestände aus dem Pressehaus Ringier. Diese umfassen ca. 6-7 Mio Bilder. Die Bilder decken ungefähr den Zeitraum 1930-2000 ab, mit einem Schwerpunkt auf der Zeit zwischen 1950-2000.</p>"
},
"hasLocation": "_:b0",
"identifiedBy": [
"_:b3",
"_:b4"
],
"isHolderOf": "https://memobase.ch/recordSet/aag-001",
"name": [
{
"@language": "de",
"@value": "Staatsarchiv des Kantons Aargau"
},
{
"@language": "it",
"@value": "Archivio cantonale del Argovia"
},
{
"@language": "fr",
"@value": "Archives de l’Etat Argovie"
}
],
"type": "memobaseInstitution"
}
],
"@context": {
"P669": {
"@id": "http://www.wikidata.org/prop/direct/P669"
},
"P670": {
"@id": "http://www.wikidata.org/prop/direct/P670"
},
"P6375": {
"@id": "http://www.wikidata.org/prop/direct/P6375"
},
"P281": {
"@id": "http://www.wikidata.org/prop/direct/P281"
},
"P625": {
"@id": "http://www.wikidata.org/prop/direct/P625"
},
"P131": {
"@id": "http://www.wikidata.org/prop/direct/P131",
"@type": "@id"
},
"P17": {
"@id": "http://www.wikidata.org/prop/direct/P17",
"@type": "@id"
},
"name": {
"@id": "https://www.ica.org/standards/RiC/ontology#name"
},
"type": {
"@id": "https://www.ica.org/standards/RiC/ontology#type"
},
"identifiedBy": {
"@id": "https://www.ica.org/standards/RiC/ontology#identifiedBy",
"@type": "@id"
},
"P968": {
"@id": "http://www.wikidata.org/prop/direct/P968"
},
"eventType": {
"@id": "https://memobase.ch/internal/eventType"
},
"isHolderOf": {
"@id": "https://www.ica.org/standards/RiC/ontology#isHolderOf"
},
"hasLocation": {
"@id": "https://www.ica.org/standards/RiC/ontology#hasLocation",
"@type": "@id"
},
"isPublished": {
"@id": "https://memobase.ch/internal/isPublished",
"@type": "http://www.w3.org/2001/XMLSchema#boolean"
},
"P791": {
"@id": "http://www.wikidata.org/prop/direct/P791"
},
"descriptiveNote": {
"@id": "https://www.ica.org/standards/RiC/ontology#descriptiveNote"
},
"P2699": {
"@id": "http://www.wikidata.org/prop/direct/P2699"
},
"P856": {
"@id": "http://www.wikidata.org/prop/direct/P856"
},
"P31": {
"@id": "http://www.wikidata.org/prop/direct/P31",
"@type": "@id"
},
"sameAs": {
"@id": "http://schema.org/sameAs"
},
"identifier": {
"@id": "https://www.ica.org/standards/RiC/ontology#identifier"
}
}
}
\ No newline at end of file
...@@ -2,8 +2,8 @@ app: ...@@ -2,8 +2,8 @@ app:
documentTypeLabelsPath: "src/test/resources/configs/document-type-labels.csv" documentTypeLabelsPath: "src/test/resources/configs/document-type-labels.csv"
elastic: elastic:
host: localhost host: localhost
port: 2020 port: 8080
index: documents index: documents-v17
media: media:
url: "https://media.memobase.k8s.unibas.ch/memo/" url: "https://media.memobase.k8s.unibas.ch/memo/"
institutionTypeLabelsPath: "src/test/resources/configs/institution-type-labels.csv" institutionTypeLabelsPath: "src/test/resources/configs/institution-type-labels.csv"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment