/*
* search-doc-service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package org.memobase
import ch.memobase.rdf.NS
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
import org.memobase.builders.AgentContainerBuilder
import org.memobase.builders.DateContainerBuilder
import org.memobase.builders.EnrichedFacetContainerBuilder
import org.memobase.builders.FacettedContainerBuilder
import org.memobase.builders.IFieldBuilder
import org.memobase.builders.PersonFacetBuilder
import org.memobase.builders.PlaceFacetBuilder
import org.memobase.builders.SuggestContainerBuilder
import org.memobase.helpers.AspectRatio
import org.memobase.helpers.ElasticSearchWrapper
import org.memobase.helpers.Extract
import org.memobase.helpers.FacetBuildHelpers
import org.memobase.helpers.Filter
import org.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractInstitution
import org.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractRecordSet
import org.memobase.helpers.Constants
import org.memobase.helpers.JsonUtility
import org.memobase.helpers.TranslationMappers
import org.memobase.model.DocumentsSearchDoc
import org.memobase.model.EnrichedDigitalMetadata
import org.memobase.model.FacetContainer
import org.memobase.model.LanguageContainer
import org.memobase.model.Schema
class DocumentsSearchDocBuilder(
private val translationMappers: TranslationMappers,
private val elasticSearchWrapper: ElasticSearchWrapper,
private val mediaUrl: String
) {
private val log = LogManager.getLogger("SearchDocTransform")
fun transform(key: String, input: Map): Schema {
val record = input[JsonUtility.recordTag] ?: throw InvalidInputException("No record defined in the message.")
val digitalObject =
input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "digitalObject" }
val physicalObject =
input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "physicalObject" }
val keywordIds = Extract.identifiers(record[Constants.hasSubject])
val genreIds = Extract.identifiers(record[Constants.hasGenre])
val publishedByIds = Extract.identifiers(record[Constants.publishedBy])
val producerIds = Extract.identifiers(record[Constants.producer])
val spatialIds = Extract.identifiers(record[Constants.spatial])
val placeOfCaptureIds = Extract.identifiers(record[Constants.placeOfCapture])
val dateCreatedIds = Extract.identifiers(record[Constants.created])
val dateIssuedIds = Extract.identifiers(record[Constants.issued])
val temporalIds = Extract.identifiers(record[Constants.temporal])
val personFacetBuilder = PersonFacetBuilder()
val subjectPersonBuilder = AgentContainerBuilder(keywordIds, Constants.Person, null, input)
val publisherPersonBuilder = AgentContainerBuilder(publishedByIds, Constants.Person, null, input)
val producersPersonBuilder = AgentContainerBuilder(producerIds, Constants.Person, null, input)
val contributorPersonBuilder = AgentContainerBuilder(emptyList(), Constants.Person, Constants.contributor, input)
val creatorPersonBuilder = AgentContainerBuilder(emptyList(), Constants.Person, Constants.creator, input)
val subjectCorporateBodyBuilder = AgentContainerBuilder(keywordIds, Constants.CorporateBody, null, input)
val publisherCorporateBodyBuilder = AgentContainerBuilder(publishedByIds, Constants.CorporateBody, null, input)
val producersCorporateBodyBuilder = AgentContainerBuilder(producerIds, Constants.CorporateBody, null, input)
val contributorCorporateBodyBuilder =
AgentContainerBuilder(emptyList(), Constants.CorporateBody, Constants.contributor, input)
val creatorCorporateBodyBuilder = AgentContainerBuilder(emptyList(), Constants.CorporateBody, Constants.creator, input)
val subjectAgentBuilder = AgentContainerBuilder(keywordIds, Constants.Agent, null, input)
val publisherAgentBuilder = AgentContainerBuilder(publishedByIds, Constants.Agent, null, input)
val producersAgentBuilder = AgentContainerBuilder(producerIds, Constants.Agent, null, input)
val contributorAgentBuilder = AgentContainerBuilder(emptyList(), Constants.Agent, Constants.contributor, input)
val creatorAgentBuilder = AgentContainerBuilder(emptyList(), Constants.Agent, Constants.creator, input)
val placesRelatedBuilder = FacettedContainerBuilder(spatialIds, Constants.Place, Constants.name, FacetBuildHelpers::place)
val placeCapturedBuilder =
FacettedContainerBuilder(placeOfCaptureIds, Constants.Place, Constants.name, FacetBuildHelpers::place)
val placeFacetBuilder = PlaceFacetBuilder()
val dateCreatedBuilder = DateContainerBuilder(dateCreatedIds)
val dateIssuedBuilder = DateContainerBuilder(dateIssuedIds)
val temporalBuilder = DateContainerBuilder(temporalIds)
val suggestContainerBuilder = SuggestContainerBuilder(keywordIds)
val digitalIdentifierReferences = Extract.identifiers(digitalObject?.get("identifiedBy"))
val digitalIdentifierEntities = mutableListOf()
val formats = EnrichedFacetContainerBuilder(emptyList(), NS.rico + Constants.CarrierType, Constants.name, input)
val languages = EnrichedFacetContainerBuilder(emptyList(), NS.rico + Constants.Language, Constants.name, input)
val genres = EnrichedFacetContainerBuilder(genreIds, NS.skos + Constants.Concept, Constants.prefLabel, input)
for (item in input.entries) {
for (builder: IFieldBuilder in listOf(
personFacetBuilder,
subjectPersonBuilder,
publisherPersonBuilder,
producersPersonBuilder,
contributorPersonBuilder,
creatorPersonBuilder,
subjectCorporateBodyBuilder,
publisherCorporateBodyBuilder,
producersCorporateBodyBuilder,
contributorCorporateBodyBuilder,
creatorCorporateBodyBuilder,
subjectAgentBuilder,
publisherAgentBuilder,
producersAgentBuilder,
contributorAgentBuilder,
creatorAgentBuilder,
placeFacetBuilder,
placeCapturedBuilder,
placesRelatedBuilder,
dateCreatedBuilder,
dateIssuedBuilder,
temporalBuilder,
suggestContainerBuilder,
formats,
genres,
languages
)) {
if (builder.filter(item.value)) {
builder.append(key, item.value)
}
if (digitalIdentifierReferences.contains(item.key))
digitalIdentifierEntities.add(item.value)
}
}
val recordIdentifiers = Filter.entitiesByProperty("identifiedBy", record, input)
val recordTitles = Filter.entitiesByProperty("hasTitle", record, input)
val recordRules = Filter.entitiesByProperty("regulatedBy", record, input)
val subjects = Filter.entitiesByProperty("hasSubject", record, input)
val digitalRules = Filter.entitiesByProperty("regulatedBy", digitalObject, input)
val physicalRules = Filter.entitiesByProperty("regulatedBy", physicalObject, input)
val physicalIdentifiers = Filter.entitiesByProperty("identifiedBy", physicalObject, input)
val accessPhysical = Extract.typedEntityByType(physicalRules, "type", "access", "name")
.flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
val accessDigital = Extract.typedEntityByType(digitalRules, "type", "access", "name")
.flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
val usageDigital = Extract.typedEntityByType(digitalRules, "type", "usage", "sameAs").flatMap { it.toList() }
val locator = try {
val value = Extract.extractIdValue(digitalIdentifierEntities, Constants.IdentifierType.main)
if (value == null)
""
else
"${mediaUrl}${value}"
} catch (ex: NoSuchElementException) {
""
}
val addLocator = Filter.checkLocator(digitalObject)
val mediaLocation = if (addLocator) {
if (Filter.checkSftpPrefix(digitalObject!!)) {
"local"
} else {
"remote"
}
} else {
null
}
val digitalObjectValues = digitalObject.let {
if (it != null) {
val width = it.getOrDefault("width", "") as String
val height = it.getOrDefault("height", "") as String
EnrichedDigitalMetadata(
hasFormat = it.getOrDefault("hasFormat", "") as String,
isDistributedOn = it.getOrDefault("isDistributedOn", "") as String,
hasMimeType = it.getOrDefault("hasMimeType", "") as String,
height = height,
width = width,
aspectRatio = AspectRatio.asFraction(width, height),
mediaResourceDescription = it.getOrDefault("mediaResourceDescription", "") as String,
orientation = it.getOrDefault("orientation", "") as String,
hasColourContent = it.getOrDefault("P60558", "") as String,
componentColor = Extract.listOfStrings(digitalObject?.get("componentColor"))
)
} else {
EnrichedDigitalMetadata()
}
}
val type = record[Constants.ricoType].let {
if (it == null) {
translationMappers.getDocumentType("Andere")
} else {
translationMappers.getDocumentType(it as String)
}
}
val recordSetId = extractRecordSet(record)
return DocumentsSearchDoc(
title = Extract.typedEntityByType(recordTitles, "type", "main", "title"),
seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"),
broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"),
type = type,
sourceID = try {
Extract.extractIdValue(recordIdentifiers, Constants.IdentifierType.original) ?: "NoSourceIdFound"
} catch (ex: NoSuchElementException) {
log.error("No source id found for record $key.")
"NoSourceIdFound"
},
oldMemobaseId = try {
Extract.extractIdValue(recordIdentifiers, Constants.IdentifierType.oldMemobase) ?: ""
} catch (ex: NoSuchElementException) {
log.warn("No old memobase id found for record $key.")
""
},
sameAs = Extract.listOfStrings(record["sameAs"]),
abstract = Extract.languageContainer("abstract (record id: $key)", record["abstract"]),
recordId = key,
institution = extractInstitution(record).map { value -> elasticSearchWrapper.getInstitutionName(value) },
recordSet = FacetContainer(
elasticSearchWrapper.getRecordSetName(recordSetId),
null,
if (recordSetId != "") listOf(recordSetId) else emptyList()
),
descriptiveNote = Extract.languageContainer(
"descriptiveNote (record id: $key)",
record["descriptiveNote"]
),
scopeAndContent = Extract.languageContainer(
"scopeAndContent (record id: $key)",
record["scopeAndContent"]
),
relatedMaterial = Extract.languageContainer("relation (record id: $key)", record["relation"]),
source = Extract.languageContainer("source (record id: $key)", record["source"]),
temporal = temporalBuilder.build(),
dateCreated = dateCreatedBuilder.build(),
dateIssued = dateIssuedBuilder.build(),
placeCapture = placeCapturedBuilder.build(),
placeRelated = placesRelatedBuilder.build(),
placeFacet = placeFacetBuilder.build(),
rightsHolder = Extract.typedEntityByType(recordRules, "type", "holder", "name"),
conditionsOfUse = Extract.languageContainer(
"conditionsOfUse (record id: $key)",
record[Constants.conditionsOfUse]
),
memoriavClaim = record[Constants.sponsoredBy] != null,
format = formats.build(),
language = languages.build(),
genre = genres.build(),
keywords = subjects.flatMap {
Extract.languageContainer(
"hasSubject (record id: $key)",
it[Constants.prefLabel]
)
}.let {
if (it.isEmpty())
LanguageContainer.EMPTY
else
it.reduce { acc, languageContainer ->
acc.merge(languageContainer)
}
},
personSubject = subjectPersonBuilder.build(),
personProducer = producersPersonBuilder.build(),
personPublisher = publisherPersonBuilder.build(),
personContributor = contributorPersonBuilder.build(),
personCreator = creatorPersonBuilder.build(),
personsFacet = personFacetBuilder.build(),
corporateBodySubject = subjectCorporateBodyBuilder.build(),
corporateBodyProducer = producersCorporateBodyBuilder.build(),
corporateBodyPublisher = publisherCorporateBodyBuilder.build(),
corporateBodyContributor = contributorCorporateBodyBuilder.build(),
corporateBodyCreator = creatorCorporateBodyBuilder.build(),
agentSubject = subjectAgentBuilder.build(),
agentProducer = producersAgentBuilder.build(),
agentPublisher = publisherAgentBuilder.build(),
agentContributor = contributorAgentBuilder.build(),
agentCreator = creatorAgentBuilder.build(),
// DIGITAL & PHYSICAL
access = accessDigital + accessPhysical,
// DIGITAL
accessDigital = accessDigital,
durationDigital = Extract.listOfStrings(digitalObject?.get("duration")),
colourDigital = Extract.listOfStrings(digitalObject?.get(Constants.color)),
digitalObjectNote = Extract.languageContainer("descriptiveNote", digitalObject?.get("descriptiveNote")),
locator = if (addLocator) locator else null,
mediaLocation = mediaLocation,
usageConditionsDigital = Extract.languageContainer(
"conditionsOfUse",
digitalObject?.get("conditionsOfUse")
),
usageDigital = usageDigital,
usageDigitalGroup = usageDigital.map { translationMappers.getReuseStatement(it) },
digital = digitalObjectValues,
// PHYSICAL
accessPhysical = accessPhysical,
durationPhysical = Extract.listOfStrings(physicalObject?.get("duration")),
colourPhysical = Extract.languageContainer("P60558", physicalObject?.get("P60558")),
physicalCharacteristics = Extract.languageContainer(
"physicalCharacteristics",
physicalObject?.get("physicalCharacteristics")
),
physicalObjectNote = Extract.languageContainer(
"descriptiveNote",
physicalObject?.get("descriptiveNote")
),
usageConditionsPhysical = Extract.languageContainer(
"conditionsOfUse",
physicalObject?.get("conditionsOfUse")
),
usagePhysical = Extract.typedEntityByType(physicalRules, "type", "usage", "sameAs")
.flatMap { it.toList() },
callNumber = Extract.typedEntityByType(physicalIdentifiers, "type", "callNumber", "identifier")
.flatMap { it.toList() },
accessInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(recordSetId, "access"),
originalInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(recordSetId, "original"),
masterInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(recordSetId, "master"),
published = (record[Constants.isPublished] as Boolean?) ?: false,
suggest = suggestContainerBuilder.build()[0]
)
}
}