DocumentsSearchDocBuilder.kt 15.3 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase

21
import ch.memobase.rdf.NS
Jonas Waeber's avatar
Jonas Waeber committed
22 23
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
Jonas Waeber's avatar
Jonas Waeber committed
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
import org.memobase.builders.AgentContainerBuilder
import org.memobase.builders.DateContainerBuilder
import org.memobase.builders.EnrichedFacetContainerBuilder
import org.memobase.builders.FacettedContainerBuilder
import org.memobase.builders.IFieldBuilder
import org.memobase.builders.PersonFacetBuilder
import org.memobase.builders.PlaceFacetBuilder
import org.memobase.builders.SuggestContainerBuilder
import org.memobase.helpers.AspectRatio
import org.memobase.helpers.Extract
import org.memobase.helpers.FacetBuildHelpers
import org.memobase.helpers.Filter
import org.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractInstitution
import org.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractRecordSet
import org.memobase.helpers.KEYS
39
import org.memobase.helpers.TranslationMappers
40
import org.memobase.model.DocumentsSearchDoc
Jonas Waeber's avatar
Jonas Waeber committed
41
import org.memobase.model.EnrichedDigitalMetadata
42
import org.memobase.model.LanguageContainer
43
import org.memobase.model.Schema
Jonas Waeber's avatar
Jonas Waeber committed
44

45
class DocumentsSearchDocBuilder(
46 47 48
    private val translationMappers: TranslationMappers,
    private val mediaUrl: String
) {
Jonas Waeber's avatar
Jonas Waeber committed
49
    private val log = LogManager.getLogger("SearchDocTransform")
50

51
    fun transform(key: String, input: Map<String, JsonObject>): Schema {
52

Jonas Waeber's avatar
Jonas Waeber committed
53
        val record = input["record"] ?: throw InvalidInputException("No record defined in the message.")
54 55 56 57 58
        val digitalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "digitalObject" }
        val physicalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "physicalObject" }

59 60
        val keywordIds = Extract.identifiers(record[KEYS.hasSubject])
        val genreIds = Extract.identifiers(record[KEYS.hasGenre])
Jonas Waeber's avatar
Jonas Waeber committed
61 62 63 64
        val publishedByIds = Extract.identifiers(record[KEYS.publishedBy])
        val producerIds = Extract.identifiers(record[KEYS.producer])
        val spatialIds = Extract.identifiers(record[KEYS.spatial])
        val placeOfCaptureIds = Extract.identifiers(record[KEYS.placeOfCapture])
Jonas Waeber's avatar
Jonas Waeber committed
65 66 67
        val dateCreatedIds = Extract.identifiers(record[KEYS.created])
        val dateIssuedIds = Extract.identifiers(record[KEYS.issued])
        val temporalIds = Extract.identifiers(record[KEYS.temporal])
Jonas Waeber's avatar
Jonas Waeber committed
68

69
        val personFacetBuilder = PersonFacetBuilder()
70
        val subjectPersonBuilder = AgentContainerBuilder(keywordIds, KEYS.Person, null, input)
71 72 73 74 75
        val publisherPersonBuilder = AgentContainerBuilder(publishedByIds, KEYS.Person, null, input)
        val producersPersonBuilder = AgentContainerBuilder(producerIds, KEYS.Person, null, input)
        val contributorPersonBuilder = AgentContainerBuilder(emptyList(), KEYS.Person, KEYS.contributor, input)
        val creatorPersonBuilder = AgentContainerBuilder(emptyList(), KEYS.Person, KEYS.creator, input)

76
        val subjectCorporateBodyBuilder = AgentContainerBuilder(keywordIds, KEYS.CorporateBody, null, input)
77 78
        val publisherCorporateBodyBuilder = AgentContainerBuilder(publishedByIds, KEYS.CorporateBody, null, input)
        val producersCorporateBodyBuilder = AgentContainerBuilder(producerIds, KEYS.CorporateBody, null, input)
79 80
        val contributorCorporateBodyBuilder =
            AgentContainerBuilder(emptyList(), KEYS.CorporateBody, KEYS.contributor, input)
81 82
        val creatorCorporateBodyBuilder = AgentContainerBuilder(emptyList(), KEYS.CorporateBody, KEYS.creator, input)

83
        val subjectAgentBuilder = AgentContainerBuilder(keywordIds, KEYS.Agent, null, input)
84 85 86 87
        val publisherAgentBuilder = AgentContainerBuilder(publishedByIds, KEYS.Agent, null, input)
        val producersAgentBuilder = AgentContainerBuilder(producerIds, KEYS.Agent, null, input)
        val contributorAgentBuilder = AgentContainerBuilder(emptyList(), KEYS.Agent, KEYS.contributor, input)
        val creatorAgentBuilder = AgentContainerBuilder(emptyList(), KEYS.Agent, KEYS.creator, input)
88

Jonas Waeber's avatar
Jonas Waeber committed
89
        val placesRelatedBuilder = FacettedContainerBuilder(spatialIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
90 91
        val placeCapturedBuilder =
            FacettedContainerBuilder(placeOfCaptureIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
Jonas Waeber's avatar
Jonas Waeber committed
92

93 94
        val placeFacetBuilder = PlaceFacetBuilder()

Jonas Waeber's avatar
Jonas Waeber committed
95 96 97 98
        val dateCreatedBuilder = DateContainerBuilder(dateCreatedIds)
        val dateIssuedBuilder = DateContainerBuilder(dateIssuedIds)
        val temporalBuilder = DateContainerBuilder(temporalIds)

99
        val suggestContainerBuilder = SuggestContainerBuilder(keywordIds)
Jonas Waeber's avatar
Jonas Waeber committed
100

101 102 103
        val digitalIdentifierReferences = Extract.identifiers(digitalObject?.get("identifiedBy"))
        val digitalIdentifierEntities = mutableListOf<JsonObject>()

104 105 106
        val formats = EnrichedFacetContainerBuilder(emptyList(), NS.rico + KEYS.CarrierType, KEYS.name, input)
        val languages = EnrichedFacetContainerBuilder(emptyList(), NS.rico + KEYS.Language, KEYS.name, input)
        val genres = EnrichedFacetContainerBuilder(genreIds, NS.skos + KEYS.Concept, KEYS.prefLabel, input)
107

108
        for (item in input.entries) {
109 110 111 112 113 114 115
            for (builder: IFieldBuilder in listOf(
                personFacetBuilder,
                subjectPersonBuilder,
                publisherPersonBuilder,
                producersPersonBuilder,
                contributorPersonBuilder,
                creatorPersonBuilder,
116 117 118 119 120 121 122 123 124 125 126
                subjectCorporateBodyBuilder,
                publisherCorporateBodyBuilder,
                producersCorporateBodyBuilder,
                contributorCorporateBodyBuilder,
                creatorCorporateBodyBuilder,
                subjectAgentBuilder,
                publisherAgentBuilder,
                producersAgentBuilder,
                contributorAgentBuilder,
                creatorAgentBuilder,

Jonas Waeber's avatar
Jonas Waeber committed
127 128
                placeFacetBuilder,
                placeCapturedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
129
                placesRelatedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
130 131 132
                dateCreatedBuilder,
                dateIssuedBuilder,
                temporalBuilder,
133 134 135 136
                suggestContainerBuilder,
                formats,
                genres,
                languages
137
            )) {
138
                if (builder.filter(item.value)) {
139
                    builder.append(key, item.value)
140
                }
141 142 143

                if (digitalIdentifierReferences.contains(item.key))
                    digitalIdentifierEntities.add(item.value)
144 145
            }
        }
146

Jonas Waeber's avatar
Jonas Waeber committed
147 148 149
        val recordIdentifiers = Filter.entitiesByProperty("identifiedBy", record, input)
        val recordTitles = Filter.entitiesByProperty("hasTitle", record, input)
        val recordRules = Filter.entitiesByProperty("regulatedBy", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
150

Jonas Waeber's avatar
Jonas Waeber committed
151
        val subjects = Filter.entitiesByProperty("hasSubject", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
152

Jonas Waeber's avatar
Jonas Waeber committed
153
        val digitalRules = Filter.entitiesByProperty("regulatedBy", digitalObject, input)
154

Jonas Waeber's avatar
Jonas Waeber committed
155 156
        val physicalRules = Filter.entitiesByProperty("regulatedBy", physicalObject, input)
        val physicalIdentifiers = Filter.entitiesByProperty("identifiedBy", physicalObject, input)
157

158 159 160 161
        val accessPhysical = Extract.typedEntityByType(physicalRules, "type", "access", "name")
            .flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
        val accessDigital = Extract.typedEntityByType(digitalRules, "type", "access", "name")
            .flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
162

163
        val usageDigital = Extract.typedEntityByType(digitalRules, "type", "usage", "sameAs").flatMap { it.toList() }
164

165 166 167 168 169 170 171 172 173 174
        val locator = try {
            val value = Extract.extractIdValue(digitalIdentifierEntities, KEYS.IdentifierType.main)
            if (value == null)
                ""
            else
                "${mediaUrl}${value}"
        } catch (ex: NoSuchElementException) {
            ""
        }

Jonas Waeber's avatar
Jonas Waeber committed
175
        val addLocator = Filter.checkLocator(digitalObject)
Jonas Waeber's avatar
Jonas Waeber committed
176

Jonas Waeber's avatar
Jonas Waeber committed
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
        val digitalObjectValues = digitalObject.let {
            if (it != null) {
                val width = it.getOrDefault("width", "") as String
                val height = it.getOrDefault("height", "") as String
                EnrichedDigitalMetadata(
                    hasFormat = it.getOrDefault("hasFormat", "") as String,
                    isDistributedOn = it.getOrDefault("isDistributedOn", "") as String,
                    hasMimeType = it.getOrDefault("hasMimeType", "") as String,
                    height = height,
                    width = width,
                    aspectRatio = AspectRatio.asFraction(width, height),
                    mediaResourceDescription = it.getOrDefault("mediaResourceDescription", "") as String,
                    orientation = it.getOrDefault("orientation", "") as String,
                    hasColourContent = it.getOrDefault("P60558", "") as String,
                    componentColor = Extract.listOfStrings(digitalObject?.get("componentColor"))

                )
            } else {
                EnrichedDigitalMetadata()
            }
        }

199 200
        val type = record[KEYS.ricoType].let {
            if (it == null) {
201
                translationMappers.getDocumentType("Andere")
202
            } else {
203
                translationMappers.getDocumentType(it as String)
204 205 206
            }
        }

207
        return DocumentsSearchDoc(
208 209 210
            title = Extract.typedEntityByType(recordTitles, "type", "main", "title"),
            seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"),
            broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"),
211
            type = type,
Jonas Waeber's avatar
Jonas Waeber committed
212
            sourceID = try {
213
                Extract.extractIdValue(recordIdentifiers, KEYS.IdentifierType.original) ?: "NoSourceIdFound"
Jonas Waeber's avatar
Jonas Waeber committed
214
            } catch (ex: NoSuchElementException) {
215
                log.error("No source id found for record $key")
Jonas Waeber's avatar
Jonas Waeber committed
216 217
                "NoSourceIdFound"
            },
218
            sameAs = Extract.listOfStrings(record["sameAs"]),
219
            abstract = Extract.languageContainer("abstract (record id: $key)", record["abstract"]),
220
            recordId = key,
Jonas Waeber's avatar
Jonas Waeber committed
221 222
            institution = extractInstitution(record),
            recordSet = extractRecordSet(record),
223 224 225 226
            descriptiveNote = Extract.languageContainer("descriptiveNote (record id: $key)", record["descriptiveNote"]),
            scopeAndContent = Extract.languageContainer("scopeAndContent (record id: $key)", record["scopeAndContent"]),
            relatedMaterial = Extract.languageContainer("relation (record id: $key)", record["relation"]),
            source = Extract.languageContainer("source (record id: $key)", record["source"]),
Jonas Waeber's avatar
Jonas Waeber committed
227 228 229
            temporal = temporalBuilder.build(),
            dateCreated = dateCreatedBuilder.build(),
            dateIssued = dateIssuedBuilder.build(),
Jonas Waeber's avatar
Jonas Waeber committed
230 231
            placeCapture = placeCapturedBuilder.build(),
            placeRelated = placesRelatedBuilder.build(),
232
            placeFacet = placeFacetBuilder.build(),
233
            rightsHolder = Extract.typedEntityByType(recordRules, "type", "holder", "name"),
Jonas Waeber's avatar
Jonas Waeber committed
234 235 236 237
            conditionsOfUse = Extract.languageContainer(
                "conditionsOfUse (record id: $key)",
                record[KEYS.conditionsOfUse]
            ),
238
            memoriavClaim = record[KEYS.sponsoredBy] != null,
Jonas Waeber's avatar
Jonas Waeber committed
239

240 241 242
            format = formats.build(),
            language = languages.build(),
            genre = genres.build(),
243

Jonas Waeber's avatar
Jonas Waeber committed
244 245 246 247 248 249
            keywords = subjects.flatMap {
                Extract.languageContainer(
                    "hasSubject (record id: $key)",
                    it[KEYS.prefLabel]
                )
            }.let {
250 251 252
                if (it.isEmpty())
                    LanguageContainer.EMPTY
                else
253 254 255 256
                    it.reduce { acc, languageContainer ->
                        acc.merge(languageContainer)
                    }
            },
257 258 259 260 261 262 263
            personSubject = subjectPersonBuilder.build(),
            personProducer = producersPersonBuilder.build(),
            personPublisher = publisherPersonBuilder.build(),
            personContributor = contributorPersonBuilder.build(),
            personCreator = creatorPersonBuilder.build(),
            personsFacet = personFacetBuilder.build(),

264 265 266 267 268 269 270 271 272 273 274
            corporateBodySubject = subjectCorporateBodyBuilder.build(),
            corporateBodyProducer = producersCorporateBodyBuilder.build(),
            corporateBodyPublisher = publisherCorporateBodyBuilder.build(),
            corporateBodyContributor = contributorCorporateBodyBuilder.build(),
            corporateBodyCreator = creatorCorporateBodyBuilder.build(),

            agentSubject = subjectAgentBuilder.build(),
            agentProducer = producersAgentBuilder.build(),
            agentPublisher = publisherAgentBuilder.build(),
            agentContributor = contributorAgentBuilder.build(),
            agentCreator = creatorAgentBuilder.build(),
275 276

            // DIGITAL & PHYSICAL
277
            access = accessDigital + accessPhysical,
278 279 280

            // DIGITAL
            accessDigital = accessDigital,
281
            durationDigital = Extract.listOfStrings(digitalObject?.get("duration")),
282
            colourDigital = Extract.listOfStrings(digitalObject?.get(KEYS.color)),
Jonas Waeber's avatar
Jonas Waeber committed
283
            digitalObjectNote = Extract.languageContainer("descriptiveNote", digitalObject?.get("descriptiveNote")),
Jonas Waeber's avatar
Jonas Waeber committed
284
            locator = if (addLocator) locator else null,
285 286 287 288
            usageConditionsDigital = Extract.languageContainer(
                "conditionsOfUse",
                digitalObject?.get("conditionsOfUse")
            ),
289
            usageDigital = usageDigital,
290
            usageDigitalGroup = usageDigital.map { translationMappers.getReuseStatement(it) },
291

Jonas Waeber's avatar
Jonas Waeber committed
292 293
            digital = digitalObjectValues,

294 295
            // PHYSICAL
            accessPhysical = accessPhysical,
296
            durationPhysical = Extract.listOfStrings(physicalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
297
            colourPhysical = Extract.languageContainer("P60558", physicalObject?.get("P60558")),
298 299 300 301
            physicalCharacteristics = Extract.languageContainer(
                "physicalCharacteristics",
                physicalObject?.get("physicalCharacteristics")
            ),
Jonas Waeber's avatar
Jonas Waeber committed
302
            physicalObjectNote = Extract.languageContainer("descriptiveNote", physicalObject?.get("descriptiveNote")),
303 304 305 306
            usageConditionsPhysical = Extract.languageContainer(
                "conditionsOfUse",
                physicalObject?.get("conditionsOfUse")
            ),
307
            usagePhysical = Extract.typedEntityByType(physicalRules, "type", "usage", "sameAs").flatMap { it.toList() },
308 309
            callNumber = Extract.typedEntityByType(physicalIdentifiers, "type", "callNumber", "identifier")
                .flatMap { it.toList() },
Jonas Waeber's avatar
Jonas Waeber committed
310
            published = (record[KEYS.isPublished] as Boolean?) ?: false,
Jonas Waeber's avatar
Jonas Waeber committed
311
            suggest = suggestContainerBuilder.build()[0]
Jonas Waeber's avatar
Jonas Waeber committed
312 313 314
        )
    }
}