Server has been upgraded to GitLab release 13.9.6

DocumentsSearchDocBuilder.kt 13.8 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase

import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
Jonas Waeber's avatar
Jonas Waeber committed
23 24
import org.memobase.builders.*
import org.memobase.helpers.*
25
import org.memobase.model.DocumentsSearchDoc
Jonas Waeber's avatar
Jonas Waeber committed
26
import org.memobase.model.EnrichedDigitalMetadata
27
import org.memobase.model.LanguageContainer
28
import org.memobase.model.Schema
29
import org.memobase.rdf.NS
Jonas Waeber's avatar
Jonas Waeber committed
30

31
class DocumentsSearchDocBuilder(private val mediaUrl: String) {
Jonas Waeber's avatar
Jonas Waeber committed
32
    private val log = LogManager.getLogger("SearchDocTransform")
33
    fun transform(input: Map<String, JsonObject>): Schema {
34

Jonas Waeber's avatar
Jonas Waeber committed
35
        val record = input["record"] ?: throw InvalidInputException("No record defined in the message.")
36 37 38 39 40
        val digitalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "digitalObject" }
        val physicalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "physicalObject" }

41 42
        val keywordIds = Extract.identifiers(record[KEYS.hasSubject])
        val genreIds = Extract.identifiers(record[KEYS.hasGenre])
Jonas Waeber's avatar
Jonas Waeber committed
43 44 45 46
        val publishedByIds = Extract.identifiers(record[KEYS.publishedBy])
        val producerIds = Extract.identifiers(record[KEYS.producer])
        val spatialIds = Extract.identifiers(record[KEYS.spatial])
        val placeOfCaptureIds = Extract.identifiers(record[KEYS.placeOfCapture])
Jonas Waeber's avatar
Jonas Waeber committed
47 48 49
        val dateCreatedIds = Extract.identifiers(record[KEYS.created])
        val dateIssuedIds = Extract.identifiers(record[KEYS.issued])
        val temporalIds = Extract.identifiers(record[KEYS.temporal])
Jonas Waeber's avatar
Jonas Waeber committed
50

51
        val personFacetBuilder = PersonFacetBuilder()
52
        val subjectPersonBuilder = AgentContainerBuilder(keywordIds, KEYS.Person, null, input)
53 54 55 56 57
        val publisherPersonBuilder = AgentContainerBuilder(publishedByIds, KEYS.Person, null, input)
        val producersPersonBuilder = AgentContainerBuilder(producerIds, KEYS.Person, null, input)
        val contributorPersonBuilder = AgentContainerBuilder(emptyList(), KEYS.Person, KEYS.contributor, input)
        val creatorPersonBuilder = AgentContainerBuilder(emptyList(), KEYS.Person, KEYS.creator, input)

58
        val subjectCorporateBodyBuilder = AgentContainerBuilder(keywordIds, KEYS.CorporateBody, null, input)
59 60
        val publisherCorporateBodyBuilder = AgentContainerBuilder(publishedByIds, KEYS.CorporateBody, null, input)
        val producersCorporateBodyBuilder = AgentContainerBuilder(producerIds, KEYS.CorporateBody, null, input)
61 62
        val contributorCorporateBodyBuilder =
            AgentContainerBuilder(emptyList(), KEYS.CorporateBody, KEYS.contributor, input)
63 64
        val creatorCorporateBodyBuilder = AgentContainerBuilder(emptyList(), KEYS.CorporateBody, KEYS.creator, input)

65
        val subjectAgentBuilder = AgentContainerBuilder(keywordIds, KEYS.Agent, null, input)
66 67 68 69
        val publisherAgentBuilder = AgentContainerBuilder(publishedByIds, KEYS.Agent, null, input)
        val producersAgentBuilder = AgentContainerBuilder(producerIds, KEYS.Agent, null, input)
        val contributorAgentBuilder = AgentContainerBuilder(emptyList(), KEYS.Agent, KEYS.contributor, input)
        val creatorAgentBuilder = AgentContainerBuilder(emptyList(), KEYS.Agent, KEYS.creator, input)
70

Jonas Waeber's avatar
Jonas Waeber committed
71
        val placesRelatedBuilder = FacettedContainerBuilder(spatialIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
72 73
        val placeCapturedBuilder =
            FacettedContainerBuilder(placeOfCaptureIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
Jonas Waeber's avatar
Jonas Waeber committed
74

75 76
        val placeFacetBuilder = PlaceFacetBuilder()

Jonas Waeber's avatar
Jonas Waeber committed
77 78 79 80
        val dateCreatedBuilder = DateContainerBuilder(dateCreatedIds)
        val dateIssuedBuilder = DateContainerBuilder(dateIssuedIds)
        val temporalBuilder = DateContainerBuilder(temporalIds)

81
        val suggestContainerBuilder = SuggestContainerBuilder(keywordIds)
Jonas Waeber's avatar
Jonas Waeber committed
82

83 84 85
        val digitalIdentifierReferences = Extract.identifiers(digitalObject?.get("identifiedBy"))
        val digitalIdentifierEntities = mutableListOf<JsonObject>()

86 87 88 89
        val formats = EnrichedFacetContainerBuilder(emptyList(), NS.rico + KEYS.CarrierType, KEYS.name)
        val languages = EnrichedFacetContainerBuilder(emptyList(), NS.rico + KEYS.Language, KEYS.name)
        val genres = EnrichedFacetContainerBuilder(genreIds, NS.skos + KEYS.Concept, KEYS.prefLabel)

90 91
        val id = record["@id"] as String

92
        for (item in input.entries) {
93 94 95 96 97 98 99
            for (builder: IFieldBuilder in listOf(
                personFacetBuilder,
                subjectPersonBuilder,
                publisherPersonBuilder,
                producersPersonBuilder,
                contributorPersonBuilder,
                creatorPersonBuilder,
100 101 102 103 104 105 106 107 108 109 110
                subjectCorporateBodyBuilder,
                publisherCorporateBodyBuilder,
                producersCorporateBodyBuilder,
                contributorCorporateBodyBuilder,
                creatorCorporateBodyBuilder,
                subjectAgentBuilder,
                publisherAgentBuilder,
                producersAgentBuilder,
                contributorAgentBuilder,
                creatorAgentBuilder,

Jonas Waeber's avatar
Jonas Waeber committed
111 112
                placeFacetBuilder,
                placeCapturedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
113
                placesRelatedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
114 115 116
                dateCreatedBuilder,
                dateIssuedBuilder,
                temporalBuilder,
117 118 119 120
                suggestContainerBuilder,
                formats,
                genres,
                languages
121
            )) {
122
                if (builder.filter(item.value)) {
123
                    builder.append(id, item.value)
124
                }
125 126 127

                if (digitalIdentifierReferences.contains(item.key))
                    digitalIdentifierEntities.add(item.value)
128 129
            }
        }
130

Jonas Waeber's avatar
Jonas Waeber committed
131 132 133
        val recordIdentifiers = Filter.entitiesByProperty("identifiedBy", record, input)
        val recordTitles = Filter.entitiesByProperty("hasTitle", record, input)
        val recordRules = Filter.entitiesByProperty("regulatedBy", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
134

Jonas Waeber's avatar
Jonas Waeber committed
135
        val subjects = Filter.entitiesByProperty("hasSubject", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
136

Jonas Waeber's avatar
Jonas Waeber committed
137
        val digitalRules = Filter.entitiesByProperty("regulatedBy", digitalObject, input)
138

Jonas Waeber's avatar
Jonas Waeber committed
139 140
        val physicalRules = Filter.entitiesByProperty("regulatedBy", physicalObject, input)
        val physicalIdentifiers = Filter.entitiesByProperty("identifiedBy", physicalObject, input)
141

142 143
        val accessPhysical = Extract.typedEntityByType(physicalRules, "type", "access", "name").flatMap { it.toList() }
        val accessDigital = Extract.typedEntityByType(digitalRules, "type", "access", "name").flatMap { it.toList() }
144

145
        val usageDigital = Extract.typedEntityByType(digitalRules, "type", "usage", "sameAs").flatMap { it.toList() }
146

147 148 149 150 151 152 153 154 155 156
        val locator = try {
            val value = Extract.extractIdValue(digitalIdentifierEntities, KEYS.IdentifierType.main)
            if (value == null)
                ""
            else
                "${mediaUrl}${value}"
        } catch (ex: NoSuchElementException) {
            ""
        }

Jonas Waeber's avatar
Jonas Waeber committed
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
        val digitalObjectValues = digitalObject.let {
            if (it != null) {
                val width = it.getOrDefault("width", "") as String
                val height = it.getOrDefault("height", "") as String
                EnrichedDigitalMetadata(
                    hasFormat = it.getOrDefault("hasFormat", "") as String,
                    isDistributedOn = it.getOrDefault("isDistributedOn", "") as String,
                    hasMimeType = it.getOrDefault("hasMimeType", "") as String,
                    height = height,
                    width = width,
                    aspectRatio = AspectRatio.asFraction(width, height),
                    mediaResourceDescription = it.getOrDefault("mediaResourceDescription", "") as String,
                    orientation = it.getOrDefault("orientation", "") as String,
                    hasColourContent = it.getOrDefault("P60558", "") as String,
                    componentColor = Extract.listOfStrings(digitalObject?.get("componentColor"))

                )
            } else {
                EnrichedDigitalMetadata()
            }
        }

179
        return DocumentsSearchDoc(
180 181 182
            title = Extract.typedEntityByType(recordTitles, "type", "main", "title"),
            seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"),
            broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"),
183
            type = record.getOrDefault("type", "NoDocumentTypeDefined") as String,
Jonas Waeber's avatar
Jonas Waeber committed
184
            sourceID = try {
185
                Extract.extractIdValue(recordIdentifiers, KEYS.IdentifierType.original) ?: "NoSourceIdFound"
Jonas Waeber's avatar
Jonas Waeber committed
186 187 188 189
            } catch (ex: NoSuchElementException) {
                log.error("No source id found for record $id")
                "NoSourceIdFound"
            },
190
            sameAs = Extract.listOfStrings(record["sameAs"]),
Jonas Waeber's avatar
Jonas Waeber committed
191
            abstract = Extract.languageContainer("abstract", record["abstract"]),
192
            recordId = id,
193
            institution = Meta.extractInstitution(record),
Jonas Waeber's avatar
Jonas Waeber committed
194
            recordSet = Meta.extractRecordSet(record),
Jonas Waeber's avatar
Jonas Waeber committed
195 196 197 198
            descriptiveNote = Extract.languageContainer("descriptiveNote", record["descriptiveNote"]),
            scopeAndContent = Extract.languageContainer("scopeAndContent", record["scopeAndContent"]),
            relatedMaterial = Extract.languageContainer("relation", record["relation"]),
            source = Extract.languageContainer("source", record["source"]),
Jonas Waeber's avatar
Jonas Waeber committed
199 200 201
            temporal = temporalBuilder.build(),
            dateCreated = dateCreatedBuilder.build(),
            dateIssued = dateIssuedBuilder.build(),
Jonas Waeber's avatar
Jonas Waeber committed
202 203
            placeCapture = placeCapturedBuilder.build(),
            placeRelated = placesRelatedBuilder.build(),
204
            placeFacet = placeFacetBuilder.build(),
205 206
            rightsHolder = Extract.typedEntityByType(recordRules, "type", "holder", "name"),
            memoriavClaim = record["P60451"] != null,
Jonas Waeber's avatar
Jonas Waeber committed
207

208 209 210
            format = formats.build(),
            language = languages.build(),
            genre = genres.build(),
211

212 213 214 215 216 217
            keywords = subjects.flatMap { Extract.languageContainer("hasSubject", it[KEYS.prefLabel]) }.let {
                if (it.isEmpty())
                    LanguageContainer.EMPTY
                else
                    it.reduce { acc, languageContainer -> acc.merge(languageContainer)
            }},
218 219 220 221 222 223 224
            personSubject = subjectPersonBuilder.build(),
            personProducer = producersPersonBuilder.build(),
            personPublisher = publisherPersonBuilder.build(),
            personContributor = contributorPersonBuilder.build(),
            personCreator = creatorPersonBuilder.build(),
            personsFacet = personFacetBuilder.build(),

225 226 227 228 229 230 231 232 233 234 235
            corporateBodySubject = subjectCorporateBodyBuilder.build(),
            corporateBodyProducer = producersCorporateBodyBuilder.build(),
            corporateBodyPublisher = publisherCorporateBodyBuilder.build(),
            corporateBodyContributor = contributorCorporateBodyBuilder.build(),
            corporateBodyCreator = creatorCorporateBodyBuilder.build(),

            agentSubject = subjectAgentBuilder.build(),
            agentProducer = producersAgentBuilder.build(),
            agentPublisher = publisherAgentBuilder.build(),
            agentContributor = contributorAgentBuilder.build(),
            agentCreator = creatorAgentBuilder.build(),
236 237 238 239 240 241

            // DIGITAL & PHYSICAL
            access = accessPhysical + accessDigital,

            // DIGITAL
            accessDigital = accessDigital,
242 243
            durationDigital = Extract.listOfStrings(digitalObject?.get("duration")),
            colourDigital = Extract.listOfStrings(digitalObject?.get("P60558")),
Jonas Waeber's avatar
Jonas Waeber committed
244
            digitalObjectNote = Extract.languageContainer("descriptiveNote", digitalObject?.get("descriptiveNote")),
245
            locator = locator,
246 247 248 249
            usageConditionsDigital = Extract.languageContainer(
                "conditionsOfUse",
                digitalObject?.get("conditionsOfUse")
            ),
250 251 252
            usageDigital = usageDigital,
            usageDigitalGroup = usageDigital.map { ReuseStatementMap.getValue(it) },

Jonas Waeber's avatar
Jonas Waeber committed
253 254
            digital = digitalObjectValues,

255 256
            // PHYSICAL
            accessPhysical = accessPhysical,
257
            durationPhysical = Extract.listOfStrings(physicalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
258
            colourPhysical = Extract.languageContainer("P60558", physicalObject?.get("P60558")),
259 260 261 262
            physicalCharacteristics = Extract.languageContainer(
                "physicalCharacteristics",
                physicalObject?.get("physicalCharacteristics")
            ),
Jonas Waeber's avatar
Jonas Waeber committed
263
            physicalObjectNote = Extract.languageContainer("descriptiveNote", physicalObject?.get("descriptiveNote")),
264 265 266 267
            usageConditionsPhysical = Extract.languageContainer(
                "conditionsOfUse",
                physicalObject?.get("conditionsOfUse")
            ),
268
            usagePhysical = Extract.typedEntityByType(physicalRules, "type", "usage", "sameAs").flatMap { it.toList() },
269 270
            callNumber = Extract.typedEntityByType(physicalIdentifiers, "type", "callNumber", "identifier")
                .flatMap { it.toList() },
Jonas Waeber's avatar
Jonas Waeber committed
271
            published = (record[KEYS.isPublished] as Boolean?) ?: false,
Jonas Waeber's avatar
Jonas Waeber committed
272
            suggest = suggestContainerBuilder.build()[0]
Jonas Waeber's avatar
Jonas Waeber committed
273 274 275
        )
    }
}