Commit 9337edd0 authored by Jonas Waeber's avatar Jonas Waeber

Fix institution search doc es request.

parent fb7a5e35
Pipeline #20540 passed with stages
in 5 minutes and 21 seconds
......@@ -80,7 +80,8 @@ class InstitutionSearchDocBuilder(
val name = extractLanguageContainer(institution[KEYS.name], "NoNameFound")
val description = extractLanguageContainer(institution[KEYS.descriptiveNote], "NoDescriptionFound")
val id = Extract.extractIdValue(identifiers, KEYS.IdentifierType.main) ?: "NoIdentifierFound"
val recordSetIds = Extract.listOfStrings(institution[KEYS.isHolderOf])
val recordSetUris = Extract.listOfStrings(institution[KEYS.isHolderOf])
val recordSetIds = recordSetUris.map { it.substringAfterLast("/") }
return InstitutionSearchDoc(
institutionId = id,
......
......@@ -59,7 +59,7 @@ class ElasticSearchWrapper(
* @return Number of Documents
*/
fun countNumberOfDocuments(recordSetIdentifier: String): Int {
log.info("Counting documents for record set $recordSetIdentifier.")
log.info("Counting documents for record set $recordSetIdentifier from index $documentsIndex.")
val request = CountRequest(documentsIndex)
request.query(
termQuery(
......@@ -71,7 +71,7 @@ class ElasticSearchWrapper(
request, RequestOptions.DEFAULT
)
val count = response.count.toInt()
log.info("Found $count documents for record set $recordSetIdentifier.")
log.info("Found $count documents for record set $recordSetIdentifier from index $documentsIndex.")
count
} catch (ex: ElasticsearchException) {
log.error(ex.detailedMessage)
......@@ -89,7 +89,7 @@ class ElasticSearchWrapper(
*/
fun getDocumentTypesFromRecords(recordSetIdentifier: String, queryField: String): List<FacetContainer> {
return try {
log.info("Attempting to load document type for $recordSetIdentifier in field $queryField.")
log.info("Attempting to load document type for $recordSetIdentifier in field $queryField from index $documentsIndex.")
val resultFacets = mutableListOf<FacetContainer>()
val typeSet = mutableSetOf<String>()
val scroll = Scroll(TimeValue.timeValueMinutes(1L))
......@@ -136,7 +136,7 @@ class ElasticSearchWrapper(
val clearScrollRequest = ClearScrollRequest()
clearScrollRequest.addScrollId(scrollId)
client.clearScroll(clearScrollRequest, RequestOptions.DEFAULT)
log.info("Found the following types $typeSet for record set $recordSetIdentifier.")
log.info("Found the following types $typeSet for record set $recordSetIdentifier from index $documentsIndex.")
resultFacets
} catch (ex: ElasticsearchException) {
log.error(ex.detailedMessage)
......
......@@ -15,7 +15,10 @@ import org.junit.jupiter.api.Disabled
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.TestInstance
import org.junit.jupiter.api.assertAll
import org.memobase.helpers.ElasticSearchWrapper
import org.memobase.helpers.JSON
import org.memobase.model.FacetContainer
import org.memobase.model.InstitutionSearchDoc
import org.memobase.model.LanguageContainer
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
......@@ -31,16 +34,48 @@ class TestInstitutionSearchDoc {
@Test
fun `test institution search doc`() {
@Disabled
fun `test institution search doc with production es client`() {
val props = App.createSettings("kafkaTest1.yml")
val elastic = ElasticSearchWrapper(
props.appSettings,
TestUtilities.connectToElasticSearch("localhost", 8080, "documents-v17"),
TestUtilities.translationMappers
)
val input = JSON.unpack(JSON.parse(readFile("input-prod.json")))
val builder = InstitutionSearchDocBuilder(TestUtilities.translationMappers, elastic)
val output = builder.transform("aag", input) as InstitutionSearchDoc
assertAll("",
{
assertThat(output.numberOfDocuments).isEqualTo(102)
},
{
assertThat(output.documentType).isEqualTo(
listOf(
FacetContainer(
LanguageContainer(listOf("Fotografie"), listOf("Photographie"), listOf("Fotografia"), emptyList()),
"Foto",
emptyList()
)
)
)
}
)
}
@Test
fun `integration test institution`() {
val settings = App.createSettings("kafkaTest1.yml")
every { TestUtilities.elasticSearchWrapperMocked.countNumberOfDocuments("https://memobase.ch/recordSet/sts-001") } returns 123
every { TestUtilities.elasticSearchWrapperMocked.getDocumentTypesFromRecords("https://memobase.ch/recordSet/sts-001", "recordSet.facet") } returns listOf(
every { TestUtilities.elasticSearchWrapperMocked.countNumberOfDocuments("sts-001") } returns 123
every {
TestUtilities.elasticSearchWrapperMocked.getDocumentTypesFromRecords(
"sts-001",
"recordSet.facet"
)
} returns listOf(
FacetContainer(
LanguageContainer(listOf("Foto"), listOf("Foto"), listOf("Foto"), emptyList()), "Foto", emptyList()
)
......
package org.memobase
import io.mockk.mockk
import java.net.ConnectException
import java.net.SocketTimeoutException
import kotlin.system.exitProcess
import org.apache.http.HttpHost
import org.apache.logging.log4j.LogManager
import org.elasticsearch.ElasticsearchException
import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest
import org.elasticsearch.client.RequestOptions
import org.elasticsearch.client.RestClient
import org.elasticsearch.client.RestHighLevelClient
import org.elasticsearch.client.indices.GetIndexRequest
import org.memobase.helpers.ElasticSearchWrapper
import org.memobase.helpers.TranslationMappers
object TestUtilities {
private val log = LogManager.getLogger("TestUtils")
private const val institutionTypePath = "src/test/resources/configs/institution-type-labels.csv"
private const val accessTermPath = "src/test/resources/configs/access-term-labels.csv"
private const val documentTypePath = "src/test/resources/configs/document-type-labels.csv"
......@@ -18,4 +29,34 @@ object TestUtilities {
val elasticSearchClient = mockk<RestHighLevelClient>()
val elasticSearchWrapperMocked = mockk<ElasticSearchWrapper>()
fun connectToElasticSearch(host: String, port: Int, documentsIndex: String): RestHighLevelClient {
return try {
val c = RestHighLevelClient(
RestClient.builder(
HttpHost(host, port)
)
)
val indexExists = c.indices().exists(GetIndexRequest(documentsIndex), RequestOptions.DEFAULT)
val aliasExists = c.indices().existsAlias(GetAliasesRequest(documentsIndex), RequestOptions.DEFAULT)
if (!indexExists && !aliasExists) {
log.error("Could not find the index or alias defined in the configuration: $documentsIndex.")
exitProcess(1)
} else {
log.info("Successfully connected to index $documentsIndex. Ready to query.")
c
}
} catch (ex: ElasticsearchException) {
log.error(ex.detailedMessage)
exitProcess(1)
} catch (ex: SocketTimeoutException) {
log.error(ex.localizedMessage)
exitProcess(1)
} catch (ex: ConnectException) {
log.error(ex.localizedMessage)
exitProcess(1)
}
}
}
\ No newline at end of file
{
"@graph": [
{
"@id": "_:b0",
"@type": "https://www.ica.org/standards/RiC/ontology#Place",
"P131": [
"_:b1",
"_:b2"
],
"P17": "http://www.wikidata.org/entity/Q39",
"P281": "5001",
"P625": "8.049455603, 47.38787196",
"P6375": "Entfelderstrasse 22",
"P669": "Entfelderstrasse",
"P670": "22"
},
{
"@id": "_:b1",
"@type": "https://www.ica.org/standards/RiC/ontology#Place",
"sameAs": "http://www.wikidata.org/entity/Q11972",
"name": [
{
"@language": "de",
"@value": "Aargau"
},
{
"@language": "fr",
"@value": "Argovie"
},
{
"@language": "it",
"@value": "Argovie"
}
],
"type": "canton"
},
{
"@id": "_:b2",
"@type": "https://www.ica.org/standards/RiC/ontology#Place",
"name": "Aarau",
"type": "municipality"
},
{
"@id": "_:b3",
"@type": "https://www.ica.org/standards/RiC/ontology#Identifier",
"identifier": "aag",
"type": "main"
},
{
"@id": "_:b4",
"@type": "https://www.ica.org/standards/RiC/ontology#Identifier",
"identifier": "StAAG",
"type": "oldMemobase"
},
{
"@id": "https://memobase.ch/institution/aag",
"@type": "https://www.ica.org/standards/RiC/ontology#CorporateBody",
"P2699": "https://www.ag.ch/staatsarchiv/suche/suchinfo.aspx",
"P31": "http://www.wikidata.org/entity/Q166118",
"P791": "CH-000051-7",
"P856": "https://www.ag.ch/staatsarchiv",
"P968": "staatsarchiv@ag.ch",
"eventType": "CREATE",
"isPublished": true,
"descriptiveNote": {
"@language": "de",
"@value": "<p>Das Staatsarchiv Aargau ist das zentrale Archiv des Kantons und seiner Rechtsvorgänger. Es sammelt das archivalische Kulturgut des Kantons und sorgt für die fachgerechte Aufbewahrung, Erschliessung und Vermittlung. Seine Bestände reichen vom Hochmittelalter bis in die Gegenwart. Neben wertvollen Urkunden und Zeugnissen vormoderner Herrschaftsfixierung bewahrt das Staatsarchiv die systematische Dokumentation der Verwaltungstätigkeit seit der Kantonsgründung 1803. Seine Kernbestände ergänzt es durch Nachlässe privater Herkunft, die von öffentlichem Interesse sind. Seit 2009 ist das Staatsarchiv Aargau im Besitz der Fotobestände aus dem Pressehaus Ringier. Diese umfassen ca. 6-7 Mio Bilder. Die Bilder decken ungefähr den Zeitraum 1930-2000 ab, mit einem Schwerpunkt auf der Zeit zwischen 1950-2000.</p>"
},
"hasLocation": "_:b0",
"identifiedBy": [
"_:b3",
"_:b4"
],
"isHolderOf": "https://memobase.ch/recordSet/aag-001",
"name": [
{
"@language": "de",
"@value": "Staatsarchiv des Kantons Aargau"
},
{
"@language": "it",
"@value": "Archivio cantonale del Argovia"
},
{
"@language": "fr",
"@value": "Archives de l’Etat Argovie"
}
],
"type": "memobaseInstitution"
}
],
"@context": {
"P669": {
"@id": "http://www.wikidata.org/prop/direct/P669"
},
"P670": {
"@id": "http://www.wikidata.org/prop/direct/P670"
},
"P6375": {
"@id": "http://www.wikidata.org/prop/direct/P6375"
},
"P281": {
"@id": "http://www.wikidata.org/prop/direct/P281"
},
"P625": {
"@id": "http://www.wikidata.org/prop/direct/P625"
},
"P131": {
"@id": "http://www.wikidata.org/prop/direct/P131",
"@type": "@id"
},
"P17": {
"@id": "http://www.wikidata.org/prop/direct/P17",
"@type": "@id"
},
"name": {
"@id": "https://www.ica.org/standards/RiC/ontology#name"
},
"type": {
"@id": "https://www.ica.org/standards/RiC/ontology#type"
},
"identifiedBy": {
"@id": "https://www.ica.org/standards/RiC/ontology#identifiedBy",
"@type": "@id"
},
"P968": {
"@id": "http://www.wikidata.org/prop/direct/P968"
},
"eventType": {
"@id": "https://memobase.ch/internal/eventType"
},
"isHolderOf": {
"@id": "https://www.ica.org/standards/RiC/ontology#isHolderOf"
},
"hasLocation": {
"@id": "https://www.ica.org/standards/RiC/ontology#hasLocation",
"@type": "@id"
},
"isPublished": {
"@id": "https://memobase.ch/internal/isPublished",
"@type": "http://www.w3.org/2001/XMLSchema#boolean"
},
"P791": {
"@id": "http://www.wikidata.org/prop/direct/P791"
},
"descriptiveNote": {
"@id": "https://www.ica.org/standards/RiC/ontology#descriptiveNote"
},
"P2699": {
"@id": "http://www.wikidata.org/prop/direct/P2699"
},
"P856": {
"@id": "http://www.wikidata.org/prop/direct/P856"
},
"P31": {
"@id": "http://www.wikidata.org/prop/direct/P31",
"@type": "@id"
},
"sameAs": {
"@id": "http://schema.org/sameAs"
},
"identifier": {
"@id": "https://www.ica.org/standards/RiC/ontology#identifier"
}
}
}
\ No newline at end of file
......@@ -2,8 +2,8 @@ app:
documentTypeLabelsPath: "src/test/resources/configs/document-type-labels.csv"
elastic:
host: localhost
port: 2020
index: documents
port: 8080
index: documents-v17
media:
url: "https://media.memobase.k8s.unibas.ch/memo/"
institutionTypeLabelsPath: "src/test/resources/configs/institution-type-labels.csv"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment