DocumentsSearchDocBuilder.kt 16.8 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase

21
import ch.memobase.rdf.NS
Jonas Waeber's avatar
Jonas Waeber committed
22 23
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
Jonas Waeber's avatar
Jonas Waeber committed
24 25 26 27 28 29 30 31 32
import org.memobase.builders.AgentContainerBuilder
import org.memobase.builders.DateContainerBuilder
import org.memobase.builders.EnrichedFacetContainerBuilder
import org.memobase.builders.FacettedContainerBuilder
import org.memobase.builders.IFieldBuilder
import org.memobase.builders.PersonFacetBuilder
import org.memobase.builders.PlaceFacetBuilder
import org.memobase.builders.SuggestContainerBuilder
import org.memobase.helpers.AspectRatio
33
import org.memobase.helpers.ElasticSearchWrapper
Jonas Waeber's avatar
Jonas Waeber committed
34 35 36 37 38 39
import org.memobase.helpers.Extract
import org.memobase.helpers.FacetBuildHelpers
import org.memobase.helpers.Filter
import org.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractInstitution
import org.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractRecordSet
import org.memobase.helpers.KEYS
40
import org.memobase.helpers.TranslationMappers
41
import org.memobase.model.DocumentsSearchDoc
Jonas Waeber's avatar
Jonas Waeber committed
42
import org.memobase.model.EnrichedDigitalMetadata
43
import org.memobase.model.FacetContainer
44
import org.memobase.model.LanguageContainer
45
import org.memobase.model.Schema
Jonas Waeber's avatar
Jonas Waeber committed
46

47
class DocumentsSearchDocBuilder(
48
    private val translationMappers: TranslationMappers,
49
    private val elasticSearchWrapper: ElasticSearchWrapper,
50 51
    private val mediaUrl: String
) {
Jonas Waeber's avatar
Jonas Waeber committed
52
    private val log = LogManager.getLogger("SearchDocTransform")
53

54
    fun transform(key: String, input: Map<String, JsonObject>): Schema {
55

Jonas Waeber's avatar
Jonas Waeber committed
56
        val record = input["record"] ?: throw InvalidInputException("No record defined in the message.")
57 58 59 60 61
        val digitalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "digitalObject" }
        val physicalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "physicalObject" }

62 63
        val keywordIds = Extract.identifiers(record[KEYS.hasSubject])
        val genreIds = Extract.identifiers(record[KEYS.hasGenre])
Jonas Waeber's avatar
Jonas Waeber committed
64 65 66 67
        val publishedByIds = Extract.identifiers(record[KEYS.publishedBy])
        val producerIds = Extract.identifiers(record[KEYS.producer])
        val spatialIds = Extract.identifiers(record[KEYS.spatial])
        val placeOfCaptureIds = Extract.identifiers(record[KEYS.placeOfCapture])
Jonas Waeber's avatar
Jonas Waeber committed
68 69 70
        val dateCreatedIds = Extract.identifiers(record[KEYS.created])
        val dateIssuedIds = Extract.identifiers(record[KEYS.issued])
        val temporalIds = Extract.identifiers(record[KEYS.temporal])
Jonas Waeber's avatar
Jonas Waeber committed
71

72
        val personFacetBuilder = PersonFacetBuilder()
73
        val subjectPersonBuilder = AgentContainerBuilder(keywordIds, KEYS.Person, null, input)
74 75 76 77 78
        val publisherPersonBuilder = AgentContainerBuilder(publishedByIds, KEYS.Person, null, input)
        val producersPersonBuilder = AgentContainerBuilder(producerIds, KEYS.Person, null, input)
        val contributorPersonBuilder = AgentContainerBuilder(emptyList(), KEYS.Person, KEYS.contributor, input)
        val creatorPersonBuilder = AgentContainerBuilder(emptyList(), KEYS.Person, KEYS.creator, input)

79
        val subjectCorporateBodyBuilder = AgentContainerBuilder(keywordIds, KEYS.CorporateBody, null, input)
80 81
        val publisherCorporateBodyBuilder = AgentContainerBuilder(publishedByIds, KEYS.CorporateBody, null, input)
        val producersCorporateBodyBuilder = AgentContainerBuilder(producerIds, KEYS.CorporateBody, null, input)
82 83
        val contributorCorporateBodyBuilder =
            AgentContainerBuilder(emptyList(), KEYS.CorporateBody, KEYS.contributor, input)
84 85
        val creatorCorporateBodyBuilder = AgentContainerBuilder(emptyList(), KEYS.CorporateBody, KEYS.creator, input)

86
        val subjectAgentBuilder = AgentContainerBuilder(keywordIds, KEYS.Agent, null, input)
87 88 89 90
        val publisherAgentBuilder = AgentContainerBuilder(publishedByIds, KEYS.Agent, null, input)
        val producersAgentBuilder = AgentContainerBuilder(producerIds, KEYS.Agent, null, input)
        val contributorAgentBuilder = AgentContainerBuilder(emptyList(), KEYS.Agent, KEYS.contributor, input)
        val creatorAgentBuilder = AgentContainerBuilder(emptyList(), KEYS.Agent, KEYS.creator, input)
91

Jonas Waeber's avatar
Jonas Waeber committed
92
        val placesRelatedBuilder = FacettedContainerBuilder(spatialIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
93 94
        val placeCapturedBuilder =
            FacettedContainerBuilder(placeOfCaptureIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
Jonas Waeber's avatar
Jonas Waeber committed
95

96 97
        val placeFacetBuilder = PlaceFacetBuilder()

Jonas Waeber's avatar
Jonas Waeber committed
98 99 100 101
        val dateCreatedBuilder = DateContainerBuilder(dateCreatedIds)
        val dateIssuedBuilder = DateContainerBuilder(dateIssuedIds)
        val temporalBuilder = DateContainerBuilder(temporalIds)

102
        val suggestContainerBuilder = SuggestContainerBuilder(keywordIds)
Jonas Waeber's avatar
Jonas Waeber committed
103

104 105 106
        val digitalIdentifierReferences = Extract.identifiers(digitalObject?.get("identifiedBy"))
        val digitalIdentifierEntities = mutableListOf<JsonObject>()

107 108 109
        val formats = EnrichedFacetContainerBuilder(emptyList(), NS.rico + KEYS.CarrierType, KEYS.name, input)
        val languages = EnrichedFacetContainerBuilder(emptyList(), NS.rico + KEYS.Language, KEYS.name, input)
        val genres = EnrichedFacetContainerBuilder(genreIds, NS.skos + KEYS.Concept, KEYS.prefLabel, input)
110

111
        for (item in input.entries) {
112 113 114 115 116 117 118
            for (builder: IFieldBuilder in listOf(
                personFacetBuilder,
                subjectPersonBuilder,
                publisherPersonBuilder,
                producersPersonBuilder,
                contributorPersonBuilder,
                creatorPersonBuilder,
119 120 121 122 123 124 125 126 127 128 129
                subjectCorporateBodyBuilder,
                publisherCorporateBodyBuilder,
                producersCorporateBodyBuilder,
                contributorCorporateBodyBuilder,
                creatorCorporateBodyBuilder,
                subjectAgentBuilder,
                publisherAgentBuilder,
                producersAgentBuilder,
                contributorAgentBuilder,
                creatorAgentBuilder,

Jonas Waeber's avatar
Jonas Waeber committed
130 131
                placeFacetBuilder,
                placeCapturedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
132
                placesRelatedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
133 134 135
                dateCreatedBuilder,
                dateIssuedBuilder,
                temporalBuilder,
136 137 138 139
                suggestContainerBuilder,
                formats,
                genres,
                languages
140
            )) {
141
                if (builder.filter(item.value)) {
142
                    builder.append(key, item.value)
143
                }
144 145 146

                if (digitalIdentifierReferences.contains(item.key))
                    digitalIdentifierEntities.add(item.value)
147 148
            }
        }
149

Jonas Waeber's avatar
Jonas Waeber committed
150 151 152
        val recordIdentifiers = Filter.entitiesByProperty("identifiedBy", record, input)
        val recordTitles = Filter.entitiesByProperty("hasTitle", record, input)
        val recordRules = Filter.entitiesByProperty("regulatedBy", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
153

Jonas Waeber's avatar
Jonas Waeber committed
154
        val subjects = Filter.entitiesByProperty("hasSubject", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
155

Jonas Waeber's avatar
Jonas Waeber committed
156
        val digitalRules = Filter.entitiesByProperty("regulatedBy", digitalObject, input)
157

Jonas Waeber's avatar
Jonas Waeber committed
158 159
        val physicalRules = Filter.entitiesByProperty("regulatedBy", physicalObject, input)
        val physicalIdentifiers = Filter.entitiesByProperty("identifiedBy", physicalObject, input)
160

161 162 163 164
        val accessPhysical = Extract.typedEntityByType(physicalRules, "type", "access", "name")
            .flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
        val accessDigital = Extract.typedEntityByType(digitalRules, "type", "access", "name")
            .flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
165

166
        val usageDigital = Extract.typedEntityByType(digitalRules, "type", "usage", "sameAs").flatMap { it.toList() }
167

168 169 170 171 172 173 174 175 176 177
        val locator = try {
            val value = Extract.extractIdValue(digitalIdentifierEntities, KEYS.IdentifierType.main)
            if (value == null)
                ""
            else
                "${mediaUrl}${value}"
        } catch (ex: NoSuchElementException) {
            ""
        }

Jonas Waeber's avatar
Jonas Waeber committed
178
        val addLocator = Filter.checkLocator(digitalObject)
Jonas Waeber's avatar
Jonas Waeber committed
179 180 181 182 183 184 185 186 187
        val mediaLocation = if (addLocator) {
            if (Filter.checkSftpPrefix(digitalObject!!)) {
                "local"
            } else {
                "remote"
            }
        } else {
            null
        }
Jonas Waeber's avatar
Jonas Waeber committed
188

Jonas Waeber's avatar
Jonas Waeber committed
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210
        val digitalObjectValues = digitalObject.let {
            if (it != null) {
                val width = it.getOrDefault("width", "") as String
                val height = it.getOrDefault("height", "") as String
                EnrichedDigitalMetadata(
                    hasFormat = it.getOrDefault("hasFormat", "") as String,
                    isDistributedOn = it.getOrDefault("isDistributedOn", "") as String,
                    hasMimeType = it.getOrDefault("hasMimeType", "") as String,
                    height = height,
                    width = width,
                    aspectRatio = AspectRatio.asFraction(width, height),
                    mediaResourceDescription = it.getOrDefault("mediaResourceDescription", "") as String,
                    orientation = it.getOrDefault("orientation", "") as String,
                    hasColourContent = it.getOrDefault("P60558", "") as String,
                    componentColor = Extract.listOfStrings(digitalObject?.get("componentColor"))

                )
            } else {
                EnrichedDigitalMetadata()
            }
        }

211 212
        val type = record[KEYS.ricoType].let {
            if (it == null) {
213
                translationMappers.getDocumentType("Andere")
214
            } else {
215
                translationMappers.getDocumentType(it as String)
216 217 218
            }
        }

219 220
        val recordSetId = extractRecordSet(record)

221
        return DocumentsSearchDoc(
222 223 224
            title = Extract.typedEntityByType(recordTitles, "type", "main", "title"),
            seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"),
            broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"),
225
            type = type,
Jonas Waeber's avatar
Jonas Waeber committed
226
            sourceID = try {
227
                Extract.extractIdValue(recordIdentifiers, KEYS.IdentifierType.original) ?: "NoSourceIdFound"
Jonas Waeber's avatar
Jonas Waeber committed
228
            } catch (ex: NoSuchElementException) {
Jonas Waeber's avatar
Jonas Waeber committed
229
                log.error("No source id found for record $key.")
Jonas Waeber's avatar
Jonas Waeber committed
230 231
                "NoSourceIdFound"
            },
Jonas Waeber's avatar
Jonas Waeber committed
232 233 234 235 236 237
            oldMemobaseId = try {
                Extract.extractIdValue(recordIdentifiers, KEYS.IdentifierType.oldMemobase) ?: ""
            } catch (ex: NoSuchElementException) {
                log.warn("No old memobase id found for record $key.")
                ""
            },
238
            sameAs = Extract.listOfStrings(record["sameAs"]),
239
            abstract = Extract.languageContainer("abstract (record id: $key)", record["abstract"]),
240
            recordId = key,
241 242 243 244 245 246 247 248 249 250 251 252 253 254
            institution = extractInstitution(record).map { value -> elasticSearchWrapper.getInstitutionName(value) },
            recordSet = FacetContainer(
                elasticSearchWrapper.getRecordSetName(recordSetId),
                null,
                if (recordSetId != "") listOf(recordSetId) else emptyList()
            ),
            descriptiveNote = Extract.languageContainer(
                "descriptiveNote (record id: $key)",
                record["descriptiveNote"]
            ),
            scopeAndContent = Extract.languageContainer(
                "scopeAndContent (record id: $key)",
                record["scopeAndContent"]
            ),
255 256
            relatedMaterial = Extract.languageContainer("relation (record id: $key)", record["relation"]),
            source = Extract.languageContainer("source (record id: $key)", record["source"]),
Jonas Waeber's avatar
Jonas Waeber committed
257 258 259
            temporal = temporalBuilder.build(),
            dateCreated = dateCreatedBuilder.build(),
            dateIssued = dateIssuedBuilder.build(),
Jonas Waeber's avatar
Jonas Waeber committed
260 261
            placeCapture = placeCapturedBuilder.build(),
            placeRelated = placesRelatedBuilder.build(),
262
            placeFacet = placeFacetBuilder.build(),
263
            rightsHolder = Extract.typedEntityByType(recordRules, "type", "holder", "name"),
Jonas Waeber's avatar
Jonas Waeber committed
264 265 266 267
            conditionsOfUse = Extract.languageContainer(
                "conditionsOfUse (record id: $key)",
                record[KEYS.conditionsOfUse]
            ),
268
            memoriavClaim = record[KEYS.sponsoredBy] != null,
Jonas Waeber's avatar
Jonas Waeber committed
269

270 271 272
            format = formats.build(),
            language = languages.build(),
            genre = genres.build(),
273

Jonas Waeber's avatar
Jonas Waeber committed
274 275 276 277 278 279
            keywords = subjects.flatMap {
                Extract.languageContainer(
                    "hasSubject (record id: $key)",
                    it[KEYS.prefLabel]
                )
            }.let {
280 281 282
                if (it.isEmpty())
                    LanguageContainer.EMPTY
                else
283 284 285 286
                    it.reduce { acc, languageContainer ->
                        acc.merge(languageContainer)
                    }
            },
287 288 289 290 291 292 293
            personSubject = subjectPersonBuilder.build(),
            personProducer = producersPersonBuilder.build(),
            personPublisher = publisherPersonBuilder.build(),
            personContributor = contributorPersonBuilder.build(),
            personCreator = creatorPersonBuilder.build(),
            personsFacet = personFacetBuilder.build(),

294 295 296 297 298 299 300 301 302 303 304
            corporateBodySubject = subjectCorporateBodyBuilder.build(),
            corporateBodyProducer = producersCorporateBodyBuilder.build(),
            corporateBodyPublisher = publisherCorporateBodyBuilder.build(),
            corporateBodyContributor = contributorCorporateBodyBuilder.build(),
            corporateBodyCreator = creatorCorporateBodyBuilder.build(),

            agentSubject = subjectAgentBuilder.build(),
            agentProducer = producersAgentBuilder.build(),
            agentPublisher = publisherAgentBuilder.build(),
            agentContributor = contributorAgentBuilder.build(),
            agentCreator = creatorAgentBuilder.build(),
305 306

            // DIGITAL & PHYSICAL
307
            access = accessDigital + accessPhysical,
308 309 310

            // DIGITAL
            accessDigital = accessDigital,
311
            durationDigital = Extract.listOfStrings(digitalObject?.get("duration")),
312
            colourDigital = Extract.listOfStrings(digitalObject?.get(KEYS.color)),
Jonas Waeber's avatar
Jonas Waeber committed
313
            digitalObjectNote = Extract.languageContainer("descriptiveNote", digitalObject?.get("descriptiveNote")),
Jonas Waeber's avatar
Jonas Waeber committed
314
            locator = if (addLocator) locator else null,
Jonas Waeber's avatar
Jonas Waeber committed
315
            mediaLocation = mediaLocation,
316 317 318 319
            usageConditionsDigital = Extract.languageContainer(
                "conditionsOfUse",
                digitalObject?.get("conditionsOfUse")
            ),
320
            usageDigital = usageDigital,
321
            usageDigitalGroup = usageDigital.map { translationMappers.getReuseStatement(it) },
322

Jonas Waeber's avatar
Jonas Waeber committed
323 324
            digital = digitalObjectValues,

325 326
            // PHYSICAL
            accessPhysical = accessPhysical,
327
            durationPhysical = Extract.listOfStrings(physicalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
328
            colourPhysical = Extract.languageContainer("P60558", physicalObject?.get("P60558")),
329 330 331 332
            physicalCharacteristics = Extract.languageContainer(
                "physicalCharacteristics",
                physicalObject?.get("physicalCharacteristics")
            ),
333 334 335 336
            physicalObjectNote = Extract.languageContainer(
                "descriptiveNote",
                physicalObject?.get("descriptiveNote")
            ),
337 338 339 340
            usageConditionsPhysical = Extract.languageContainer(
                "conditionsOfUse",
                physicalObject?.get("conditionsOfUse")
            ),
341 342
            usagePhysical = Extract.typedEntityByType(physicalRules, "type", "usage", "sameAs")
                .flatMap { it.toList() },
343 344
            callNumber = Extract.typedEntityByType(physicalIdentifiers, "type", "callNumber", "identifier")
                .flatMap { it.toList() },
345 346 347 348
            accessInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(recordSetId, "access"),
            originalInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(recordSetId, "original"),
            masterInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(recordSetId, "master"),

Jonas Waeber's avatar
Jonas Waeber committed
349
            published = (record[KEYS.isPublished] as Boolean?) ?: false,
Jonas Waeber's avatar
Jonas Waeber committed
350
            suggest = suggestContainerBuilder.build()[0]
Jonas Waeber's avatar
Jonas Waeber committed
351 352 353
        )
    }
}