/* * search-doc-service * Copyright (C) 2020 Memoriav * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ package org.memobase import ch.memobase.rdf.NS import com.beust.klaxon.JsonObject import org.apache.logging.log4j.LogManager import org.memobase.builders.* import org.memobase.helpers.* import org.memobase.model.DocumentsSearchDoc import org.memobase.model.EnrichedDigitalMetadata import org.memobase.model.LanguageContainer import org.memobase.model.Schema class DocumentsSearchDocBuilder(private val mediaUrl: String) { private val log = LogManager.getLogger("SearchDocTransform") fun transform(input: Map): Schema { val record = input["record"] ?: throw InvalidInputException("No record defined in the message.") val digitalObject = input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "digitalObject" } val physicalObject = input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "physicalObject" } val keywordIds = Extract.identifiers(record[KEYS.hasSubject]) val genreIds = Extract.identifiers(record[KEYS.hasGenre]) val publishedByIds = Extract.identifiers(record[KEYS.publishedBy]) val producerIds = Extract.identifiers(record[KEYS.producer]) val spatialIds = Extract.identifiers(record[KEYS.spatial]) val placeOfCaptureIds = Extract.identifiers(record[KEYS.placeOfCapture]) val dateCreatedIds = Extract.identifiers(record[KEYS.created]) val dateIssuedIds = Extract.identifiers(record[KEYS.issued]) val temporalIds = Extract.identifiers(record[KEYS.temporal]) val personFacetBuilder = PersonFacetBuilder() val subjectPersonBuilder = AgentContainerBuilder(keywordIds, KEYS.Person, null, input) val publisherPersonBuilder = AgentContainerBuilder(publishedByIds, KEYS.Person, null, input) val producersPersonBuilder = AgentContainerBuilder(producerIds, KEYS.Person, null, input) val contributorPersonBuilder = AgentContainerBuilder(emptyList(), KEYS.Person, KEYS.contributor, input) val creatorPersonBuilder = AgentContainerBuilder(emptyList(), KEYS.Person, KEYS.creator, input) val subjectCorporateBodyBuilder = AgentContainerBuilder(keywordIds, KEYS.CorporateBody, null, input) val publisherCorporateBodyBuilder = AgentContainerBuilder(publishedByIds, KEYS.CorporateBody, null, input) val producersCorporateBodyBuilder = AgentContainerBuilder(producerIds, KEYS.CorporateBody, null, input) val contributorCorporateBodyBuilder = AgentContainerBuilder(emptyList(), KEYS.CorporateBody, KEYS.contributor, input) val creatorCorporateBodyBuilder = AgentContainerBuilder(emptyList(), KEYS.CorporateBody, KEYS.creator, input) val subjectAgentBuilder = AgentContainerBuilder(keywordIds, KEYS.Agent, null, input) val publisherAgentBuilder = AgentContainerBuilder(publishedByIds, KEYS.Agent, null, input) val producersAgentBuilder = AgentContainerBuilder(producerIds, KEYS.Agent, null, input) val contributorAgentBuilder = AgentContainerBuilder(emptyList(), KEYS.Agent, KEYS.contributor, input) val creatorAgentBuilder = AgentContainerBuilder(emptyList(), KEYS.Agent, KEYS.creator, input) val placesRelatedBuilder = FacettedContainerBuilder(spatialIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place) val placeCapturedBuilder = FacettedContainerBuilder(placeOfCaptureIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place) val placeFacetBuilder = PlaceFacetBuilder() val dateCreatedBuilder = DateContainerBuilder(dateCreatedIds) val dateIssuedBuilder = DateContainerBuilder(dateIssuedIds) val temporalBuilder = DateContainerBuilder(temporalIds) val suggestContainerBuilder = SuggestContainerBuilder(keywordIds) val digitalIdentifierReferences = Extract.identifiers(digitalObject?.get("identifiedBy")) val digitalIdentifierEntities = mutableListOf() val formats = EnrichedFacetContainerBuilder(emptyList(), NS.rico + KEYS.CarrierType, KEYS.name) val languages = EnrichedFacetContainerBuilder(emptyList(), NS.rico + KEYS.Language, KEYS.name) val genres = EnrichedFacetContainerBuilder(genreIds, NS.skos + KEYS.Concept, KEYS.prefLabel) val id = record["@id"] as String for (item in input.entries) { for (builder: IFieldBuilder in listOf( personFacetBuilder, subjectPersonBuilder, publisherPersonBuilder, producersPersonBuilder, contributorPersonBuilder, creatorPersonBuilder, subjectCorporateBodyBuilder, publisherCorporateBodyBuilder, producersCorporateBodyBuilder, contributorCorporateBodyBuilder, creatorCorporateBodyBuilder, subjectAgentBuilder, publisherAgentBuilder, producersAgentBuilder, contributorAgentBuilder, creatorAgentBuilder, placeFacetBuilder, placeCapturedBuilder, placesRelatedBuilder, dateCreatedBuilder, dateIssuedBuilder, temporalBuilder, suggestContainerBuilder, formats, genres, languages )) { if (builder.filter(item.value)) { builder.append(id, item.value) } if (digitalIdentifierReferences.contains(item.key)) digitalIdentifierEntities.add(item.value) } } val recordIdentifiers = Filter.entitiesByProperty("identifiedBy", record, input) val recordTitles = Filter.entitiesByProperty("hasTitle", record, input) val recordRules = Filter.entitiesByProperty("regulatedBy", record, input) val subjects = Filter.entitiesByProperty("hasSubject", record, input) val digitalRules = Filter.entitiesByProperty("regulatedBy", digitalObject, input) val physicalRules = Filter.entitiesByProperty("regulatedBy", physicalObject, input) val physicalIdentifiers = Filter.entitiesByProperty("identifiedBy", physicalObject, input) val accessPhysical = Extract.typedEntityByType(physicalRules, "type", "access", "name").flatMap { it.toList() } val accessDigital = Extract.typedEntityByType(digitalRules, "type", "access", "name").flatMap { it.toList() } val usageDigital = Extract.typedEntityByType(digitalRules, "type", "usage", "sameAs").flatMap { it.toList() } val locator = try { val value = Extract.extractIdValue(digitalIdentifierEntities, KEYS.IdentifierType.main) if (value == null) "" else "${mediaUrl}${value}" } catch (ex: NoSuchElementException) { "" } val digitalObjectValues = digitalObject.let { if (it != null) { val width = it.getOrDefault("width", "") as String val height = it.getOrDefault("height", "") as String EnrichedDigitalMetadata( hasFormat = it.getOrDefault("hasFormat", "") as String, isDistributedOn = it.getOrDefault("isDistributedOn", "") as String, hasMimeType = it.getOrDefault("hasMimeType", "") as String, height = height, width = width, aspectRatio = AspectRatio.asFraction(width, height), mediaResourceDescription = it.getOrDefault("mediaResourceDescription", "") as String, orientation = it.getOrDefault("orientation", "") as String, hasColourContent = it.getOrDefault("P60558", "") as String, componentColor = Extract.listOfStrings(digitalObject?.get("componentColor")) ) } else { EnrichedDigitalMetadata() } } return DocumentsSearchDoc( title = Extract.typedEntityByType(recordTitles, "type", "main", "title"), seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"), broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"), type = record.getOrDefault("type", "NoDocumentTypeDefined") as String, sourceID = try { Extract.extractIdValue(recordIdentifiers, KEYS.IdentifierType.original) ?: "NoSourceIdFound" } catch (ex: NoSuchElementException) { log.error("No source id found for record $id") "NoSourceIdFound" }, sameAs = Extract.listOfStrings(record["sameAs"]), abstract = Extract.languageContainer("abstract", record["abstract"]), recordId = id, institution = Meta.extractInstitution(record), recordSet = Meta.extractRecordSet(record), descriptiveNote = Extract.languageContainer("descriptiveNote", record["descriptiveNote"]), scopeAndContent = Extract.languageContainer("scopeAndContent", record["scopeAndContent"]), relatedMaterial = Extract.languageContainer("relation", record["relation"]), source = Extract.languageContainer("source", record["source"]), temporal = temporalBuilder.build(), dateCreated = dateCreatedBuilder.build(), dateIssued = dateIssuedBuilder.build(), placeCapture = placeCapturedBuilder.build(), placeRelated = placesRelatedBuilder.build(), placeFacet = placeFacetBuilder.build(), rightsHolder = Extract.typedEntityByType(recordRules, "type", "holder", "name"), memoriavClaim = record["P60451"] != null, format = formats.build(), language = languages.build(), genre = genres.build(), keywords = subjects.flatMap { Extract.languageContainer("hasSubject", it[KEYS.prefLabel]) }.let { if (it.isEmpty()) LanguageContainer.EMPTY else it.reduce { acc, languageContainer -> acc.merge(languageContainer) }}, personSubject = subjectPersonBuilder.build(), personProducer = producersPersonBuilder.build(), personPublisher = publisherPersonBuilder.build(), personContributor = contributorPersonBuilder.build(), personCreator = creatorPersonBuilder.build(), personsFacet = personFacetBuilder.build(), corporateBodySubject = subjectCorporateBodyBuilder.build(), corporateBodyProducer = producersCorporateBodyBuilder.build(), corporateBodyPublisher = publisherCorporateBodyBuilder.build(), corporateBodyContributor = contributorCorporateBodyBuilder.build(), corporateBodyCreator = creatorCorporateBodyBuilder.build(), agentSubject = subjectAgentBuilder.build(), agentProducer = producersAgentBuilder.build(), agentPublisher = publisherAgentBuilder.build(), agentContributor = contributorAgentBuilder.build(), agentCreator = creatorAgentBuilder.build(), // DIGITAL & PHYSICAL access = accessPhysical + accessDigital, // DIGITAL accessDigital = accessDigital, durationDigital = Extract.listOfStrings(digitalObject?.get("duration")), colourDigital = Extract.listOfStrings(digitalObject?.get("P60558")), digitalObjectNote = Extract.languageContainer("descriptiveNote", digitalObject?.get("descriptiveNote")), locator = locator, usageConditionsDigital = Extract.languageContainer( "conditionsOfUse", digitalObject?.get("conditionsOfUse") ), usageDigital = usageDigital, usageDigitalGroup = usageDigital.map { ReuseStatementMap.getValue(it) }, digital = digitalObjectValues, // PHYSICAL accessPhysical = accessPhysical, durationPhysical = Extract.listOfStrings(physicalObject?.get("duration")), colourPhysical = Extract.languageContainer("P60558", physicalObject?.get("P60558")), physicalCharacteristics = Extract.languageContainer( "physicalCharacteristics", physicalObject?.get("physicalCharacteristics") ), physicalObjectNote = Extract.languageContainer("descriptiveNote", physicalObject?.get("descriptiveNote")), usageConditionsPhysical = Extract.languageContainer( "conditionsOfUse", physicalObject?.get("conditionsOfUse") ), usagePhysical = Extract.typedEntityByType(physicalRules, "type", "usage", "sameAs").flatMap { it.toList() }, callNumber = Extract.typedEntityByType(physicalIdentifiers, "type", "callNumber", "identifier") .flatMap { it.toList() }, published = (record[KEYS.isPublished] as Boolean?) ?: false, suggest = suggestContainerBuilder.build()[0] ) } }