/*
* search-doc-service
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package org.memobase
import ch.memobase.rdf.NS
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
import org.memobase.builders.*
import org.memobase.helpers.*
import org.memobase.model.DocumentsSearchDoc
import org.memobase.model.EnrichedDigitalMetadata
import org.memobase.model.LanguageContainer
import org.memobase.model.Schema
class DocumentsSearchDocBuilder(private val mediaUrl: String) {
private val log = LogManager.getLogger("SearchDocTransform")
fun transform(input: Map): Schema {
val record = input["record"] ?: throw InvalidInputException("No record defined in the message.")
val digitalObject =
input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "digitalObject" }
val physicalObject =
input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "physicalObject" }
val keywordIds = Extract.identifiers(record[KEYS.hasSubject])
val genreIds = Extract.identifiers(record[KEYS.hasGenre])
val publishedByIds = Extract.identifiers(record[KEYS.publishedBy])
val producerIds = Extract.identifiers(record[KEYS.producer])
val spatialIds = Extract.identifiers(record[KEYS.spatial])
val placeOfCaptureIds = Extract.identifiers(record[KEYS.placeOfCapture])
val dateCreatedIds = Extract.identifiers(record[KEYS.created])
val dateIssuedIds = Extract.identifiers(record[KEYS.issued])
val temporalIds = Extract.identifiers(record[KEYS.temporal])
val personFacetBuilder = PersonFacetBuilder()
val subjectPersonBuilder = AgentContainerBuilder(keywordIds, KEYS.Person, null, input)
val publisherPersonBuilder = AgentContainerBuilder(publishedByIds, KEYS.Person, null, input)
val producersPersonBuilder = AgentContainerBuilder(producerIds, KEYS.Person, null, input)
val contributorPersonBuilder = AgentContainerBuilder(emptyList(), KEYS.Person, KEYS.contributor, input)
val creatorPersonBuilder = AgentContainerBuilder(emptyList(), KEYS.Person, KEYS.creator, input)
val subjectCorporateBodyBuilder = AgentContainerBuilder(keywordIds, KEYS.CorporateBody, null, input)
val publisherCorporateBodyBuilder = AgentContainerBuilder(publishedByIds, KEYS.CorporateBody, null, input)
val producersCorporateBodyBuilder = AgentContainerBuilder(producerIds, KEYS.CorporateBody, null, input)
val contributorCorporateBodyBuilder =
AgentContainerBuilder(emptyList(), KEYS.CorporateBody, KEYS.contributor, input)
val creatorCorporateBodyBuilder = AgentContainerBuilder(emptyList(), KEYS.CorporateBody, KEYS.creator, input)
val subjectAgentBuilder = AgentContainerBuilder(keywordIds, KEYS.Agent, null, input)
val publisherAgentBuilder = AgentContainerBuilder(publishedByIds, KEYS.Agent, null, input)
val producersAgentBuilder = AgentContainerBuilder(producerIds, KEYS.Agent, null, input)
val contributorAgentBuilder = AgentContainerBuilder(emptyList(), KEYS.Agent, KEYS.contributor, input)
val creatorAgentBuilder = AgentContainerBuilder(emptyList(), KEYS.Agent, KEYS.creator, input)
val placesRelatedBuilder = FacettedContainerBuilder(spatialIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
val placeCapturedBuilder =
FacettedContainerBuilder(placeOfCaptureIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
val placeFacetBuilder = PlaceFacetBuilder()
val dateCreatedBuilder = DateContainerBuilder(dateCreatedIds)
val dateIssuedBuilder = DateContainerBuilder(dateIssuedIds)
val temporalBuilder = DateContainerBuilder(temporalIds)
val suggestContainerBuilder = SuggestContainerBuilder(keywordIds)
val digitalIdentifierReferences = Extract.identifiers(digitalObject?.get("identifiedBy"))
val digitalIdentifierEntities = mutableListOf()
val formats = EnrichedFacetContainerBuilder(emptyList(), NS.rico + KEYS.CarrierType, KEYS.name)
val languages = EnrichedFacetContainerBuilder(emptyList(), NS.rico + KEYS.Language, KEYS.name)
val genres = EnrichedFacetContainerBuilder(genreIds, NS.skos + KEYS.Concept, KEYS.prefLabel)
val id = record["@id"] as String
for (item in input.entries) {
for (builder: IFieldBuilder in listOf(
personFacetBuilder,
subjectPersonBuilder,
publisherPersonBuilder,
producersPersonBuilder,
contributorPersonBuilder,
creatorPersonBuilder,
subjectCorporateBodyBuilder,
publisherCorporateBodyBuilder,
producersCorporateBodyBuilder,
contributorCorporateBodyBuilder,
creatorCorporateBodyBuilder,
subjectAgentBuilder,
publisherAgentBuilder,
producersAgentBuilder,
contributorAgentBuilder,
creatorAgentBuilder,
placeFacetBuilder,
placeCapturedBuilder,
placesRelatedBuilder,
dateCreatedBuilder,
dateIssuedBuilder,
temporalBuilder,
suggestContainerBuilder,
formats,
genres,
languages
)) {
if (builder.filter(item.value)) {
builder.append(id, item.value)
}
if (digitalIdentifierReferences.contains(item.key))
digitalIdentifierEntities.add(item.value)
}
}
val recordIdentifiers = Filter.entitiesByProperty("identifiedBy", record, input)
val recordTitles = Filter.entitiesByProperty("hasTitle", record, input)
val recordRules = Filter.entitiesByProperty("regulatedBy", record, input)
val subjects = Filter.entitiesByProperty("hasSubject", record, input)
val digitalRules = Filter.entitiesByProperty("regulatedBy", digitalObject, input)
val physicalRules = Filter.entitiesByProperty("regulatedBy", physicalObject, input)
val physicalIdentifiers = Filter.entitiesByProperty("identifiedBy", physicalObject, input)
val accessPhysical = Extract.typedEntityByType(physicalRules, "type", "access", "name").flatMap { it.toList() }
val accessDigital = Extract.typedEntityByType(digitalRules, "type", "access", "name").flatMap { it.toList() }
val usageDigital = Extract.typedEntityByType(digitalRules, "type", "usage", "sameAs").flatMap { it.toList() }
val locator = try {
val value = Extract.extractIdValue(digitalIdentifierEntities, KEYS.IdentifierType.main)
if (value == null)
""
else
"${mediaUrl}${value}"
} catch (ex: NoSuchElementException) {
""
}
val digitalObjectValues = digitalObject.let {
if (it != null) {
val width = it.getOrDefault("width", "") as String
val height = it.getOrDefault("height", "") as String
EnrichedDigitalMetadata(
hasFormat = it.getOrDefault("hasFormat", "") as String,
isDistributedOn = it.getOrDefault("isDistributedOn", "") as String,
hasMimeType = it.getOrDefault("hasMimeType", "") as String,
height = height,
width = width,
aspectRatio = AspectRatio.asFraction(width, height),
mediaResourceDescription = it.getOrDefault("mediaResourceDescription", "") as String,
orientation = it.getOrDefault("orientation", "") as String,
hasColourContent = it.getOrDefault("P60558", "") as String,
componentColor = Extract.listOfStrings(digitalObject?.get("componentColor"))
)
} else {
EnrichedDigitalMetadata()
}
}
return DocumentsSearchDoc(
title = Extract.typedEntityByType(recordTitles, "type", "main", "title"),
seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"),
broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"),
type = record.getOrDefault("type", "NoDocumentTypeDefined") as String,
sourceID = try {
Extract.extractIdValue(recordIdentifiers, KEYS.IdentifierType.original) ?: "NoSourceIdFound"
} catch (ex: NoSuchElementException) {
log.error("No source id found for record $id")
"NoSourceIdFound"
},
sameAs = Extract.listOfStrings(record["sameAs"]),
abstract = Extract.languageContainer("abstract", record["abstract"]),
recordId = id,
institution = Meta.extractInstitution(record),
recordSet = Meta.extractRecordSet(record),
descriptiveNote = Extract.languageContainer("descriptiveNote", record["descriptiveNote"]),
scopeAndContent = Extract.languageContainer("scopeAndContent", record["scopeAndContent"]),
relatedMaterial = Extract.languageContainer("relation", record["relation"]),
source = Extract.languageContainer("source", record["source"]),
temporal = temporalBuilder.build(),
dateCreated = dateCreatedBuilder.build(),
dateIssued = dateIssuedBuilder.build(),
placeCapture = placeCapturedBuilder.build(),
placeRelated = placesRelatedBuilder.build(),
placeFacet = placeFacetBuilder.build(),
rightsHolder = Extract.typedEntityByType(recordRules, "type", "holder", "name"),
memoriavClaim = record["P60451"] != null,
format = formats.build(),
language = languages.build(),
genre = genres.build(),
keywords = subjects.flatMap { Extract.languageContainer("hasSubject", it[KEYS.prefLabel]) }.let {
if (it.isEmpty())
LanguageContainer.EMPTY
else
it.reduce { acc, languageContainer -> acc.merge(languageContainer)
}},
personSubject = subjectPersonBuilder.build(),
personProducer = producersPersonBuilder.build(),
personPublisher = publisherPersonBuilder.build(),
personContributor = contributorPersonBuilder.build(),
personCreator = creatorPersonBuilder.build(),
personsFacet = personFacetBuilder.build(),
corporateBodySubject = subjectCorporateBodyBuilder.build(),
corporateBodyProducer = producersCorporateBodyBuilder.build(),
corporateBodyPublisher = publisherCorporateBodyBuilder.build(),
corporateBodyContributor = contributorCorporateBodyBuilder.build(),
corporateBodyCreator = creatorCorporateBodyBuilder.build(),
agentSubject = subjectAgentBuilder.build(),
agentProducer = producersAgentBuilder.build(),
agentPublisher = publisherAgentBuilder.build(),
agentContributor = contributorAgentBuilder.build(),
agentCreator = creatorAgentBuilder.build(),
// DIGITAL & PHYSICAL
access = accessPhysical + accessDigital,
// DIGITAL
accessDigital = accessDigital,
durationDigital = Extract.listOfStrings(digitalObject?.get("duration")),
colourDigital = Extract.listOfStrings(digitalObject?.get("P60558")),
digitalObjectNote = Extract.languageContainer("descriptiveNote", digitalObject?.get("descriptiveNote")),
locator = locator,
usageConditionsDigital = Extract.languageContainer(
"conditionsOfUse",
digitalObject?.get("conditionsOfUse")
),
usageDigital = usageDigital,
usageDigitalGroup = usageDigital.map { ReuseStatementMap.getValue(it) },
digital = digitalObjectValues,
// PHYSICAL
accessPhysical = accessPhysical,
durationPhysical = Extract.listOfStrings(physicalObject?.get("duration")),
colourPhysical = Extract.languageContainer("P60558", physicalObject?.get("P60558")),
physicalCharacteristics = Extract.languageContainer(
"physicalCharacteristics",
physicalObject?.get("physicalCharacteristics")
),
physicalObjectNote = Extract.languageContainer("descriptiveNote", physicalObject?.get("descriptiveNote")),
usageConditionsPhysical = Extract.languageContainer(
"conditionsOfUse",
physicalObject?.get("conditionsOfUse")
),
usagePhysical = Extract.typedEntityByType(physicalRules, "type", "usage", "sameAs").flatMap { it.toList() },
callNumber = Extract.typedEntityByType(physicalIdentifiers, "type", "callNumber", "identifier")
.flatMap { it.toList() },
published = (record[KEYS.isPublished] as Boolean?) ?: false,
suggest = suggestContainerBuilder.build()[0]
)
}
}