DocumentsSearchDocBuilder.kt 16.5 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase

21
import ch.memobase.rdf.NS
Jonas Waeber's avatar
Jonas Waeber committed
22
23
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
Jonas Waeber's avatar
Jonas Waeber committed
24
25
26
27
28
29
30
31
32
import org.memobase.builders.AgentContainerBuilder
import org.memobase.builders.DateContainerBuilder
import org.memobase.builders.EnrichedFacetContainerBuilder
import org.memobase.builders.FacettedContainerBuilder
import org.memobase.builders.IFieldBuilder
import org.memobase.builders.PersonFacetBuilder
import org.memobase.builders.PlaceFacetBuilder
import org.memobase.builders.SuggestContainerBuilder
import org.memobase.helpers.AspectRatio
33
import org.memobase.helpers.ElasticSearchWrapper
Jonas Waeber's avatar
Jonas Waeber committed
34
35
36
37
38
39
import org.memobase.helpers.Extract
import org.memobase.helpers.FacetBuildHelpers
import org.memobase.helpers.Filter
import org.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractInstitution
import org.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractRecordSet
import org.memobase.helpers.KEYS
40
import org.memobase.helpers.TranslationMappers
41
import org.memobase.model.DocumentsSearchDoc
Jonas Waeber's avatar
Jonas Waeber committed
42
import org.memobase.model.EnrichedDigitalMetadata
43
import org.memobase.model.FacetContainer
44
import org.memobase.model.LanguageContainer
45
import org.memobase.model.Schema
Jonas Waeber's avatar
Jonas Waeber committed
46

47
class DocumentsSearchDocBuilder(
48
    private val translationMappers: TranslationMappers,
49
    private val elasticSearchWrapper: ElasticSearchWrapper,
50
51
    private val mediaUrl: String
) {
Jonas Waeber's avatar
Jonas Waeber committed
52
    private val log = LogManager.getLogger("SearchDocTransform")
53

54
    fun transform(key: String, input: Map<String, JsonObject>): Schema {
55

Jonas Waeber's avatar
Jonas Waeber committed
56
        val record = input["record"] ?: throw InvalidInputException("No record defined in the message.")
57
58
59
60
61
        val digitalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "digitalObject" }
        val physicalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "physicalObject" }

62
63
        val keywordIds = Extract.identifiers(record[KEYS.hasSubject])
        val genreIds = Extract.identifiers(record[KEYS.hasGenre])
Jonas Waeber's avatar
Jonas Waeber committed
64
65
66
67
        val publishedByIds = Extract.identifiers(record[KEYS.publishedBy])
        val producerIds = Extract.identifiers(record[KEYS.producer])
        val spatialIds = Extract.identifiers(record[KEYS.spatial])
        val placeOfCaptureIds = Extract.identifiers(record[KEYS.placeOfCapture])
Jonas Waeber's avatar
Jonas Waeber committed
68
69
70
        val dateCreatedIds = Extract.identifiers(record[KEYS.created])
        val dateIssuedIds = Extract.identifiers(record[KEYS.issued])
        val temporalIds = Extract.identifiers(record[KEYS.temporal])
Jonas Waeber's avatar
Jonas Waeber committed
71

72
        val personFacetBuilder = PersonFacetBuilder()
73
        val subjectPersonBuilder = AgentContainerBuilder(keywordIds, KEYS.Person, null, input)
74
75
76
77
78
        val publisherPersonBuilder = AgentContainerBuilder(publishedByIds, KEYS.Person, null, input)
        val producersPersonBuilder = AgentContainerBuilder(producerIds, KEYS.Person, null, input)
        val contributorPersonBuilder = AgentContainerBuilder(emptyList(), KEYS.Person, KEYS.contributor, input)
        val creatorPersonBuilder = AgentContainerBuilder(emptyList(), KEYS.Person, KEYS.creator, input)

79
        val subjectCorporateBodyBuilder = AgentContainerBuilder(keywordIds, KEYS.CorporateBody, null, input)
80
81
        val publisherCorporateBodyBuilder = AgentContainerBuilder(publishedByIds, KEYS.CorporateBody, null, input)
        val producersCorporateBodyBuilder = AgentContainerBuilder(producerIds, KEYS.CorporateBody, null, input)
82
83
        val contributorCorporateBodyBuilder =
            AgentContainerBuilder(emptyList(), KEYS.CorporateBody, KEYS.contributor, input)
84
85
        val creatorCorporateBodyBuilder = AgentContainerBuilder(emptyList(), KEYS.CorporateBody, KEYS.creator, input)

86
        val subjectAgentBuilder = AgentContainerBuilder(keywordIds, KEYS.Agent, null, input)
87
88
89
90
        val publisherAgentBuilder = AgentContainerBuilder(publishedByIds, KEYS.Agent, null, input)
        val producersAgentBuilder = AgentContainerBuilder(producerIds, KEYS.Agent, null, input)
        val contributorAgentBuilder = AgentContainerBuilder(emptyList(), KEYS.Agent, KEYS.contributor, input)
        val creatorAgentBuilder = AgentContainerBuilder(emptyList(), KEYS.Agent, KEYS.creator, input)
91

Jonas Waeber's avatar
Jonas Waeber committed
92
        val placesRelatedBuilder = FacettedContainerBuilder(spatialIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
93
94
        val placeCapturedBuilder =
            FacettedContainerBuilder(placeOfCaptureIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
Jonas Waeber's avatar
Jonas Waeber committed
95

96
97
        val placeFacetBuilder = PlaceFacetBuilder()

Jonas Waeber's avatar
Jonas Waeber committed
98
99
100
101
        val dateCreatedBuilder = DateContainerBuilder(dateCreatedIds)
        val dateIssuedBuilder = DateContainerBuilder(dateIssuedIds)
        val temporalBuilder = DateContainerBuilder(temporalIds)

102
        val suggestContainerBuilder = SuggestContainerBuilder(keywordIds)
Jonas Waeber's avatar
Jonas Waeber committed
103

104
105
106
        val digitalIdentifierReferences = Extract.identifiers(digitalObject?.get("identifiedBy"))
        val digitalIdentifierEntities = mutableListOf<JsonObject>()

107
108
109
        val formats = EnrichedFacetContainerBuilder(emptyList(), NS.rico + KEYS.CarrierType, KEYS.name, input)
        val languages = EnrichedFacetContainerBuilder(emptyList(), NS.rico + KEYS.Language, KEYS.name, input)
        val genres = EnrichedFacetContainerBuilder(genreIds, NS.skos + KEYS.Concept, KEYS.prefLabel, input)
110

111
        for (item in input.entries) {
112
113
114
115
116
117
118
            for (builder: IFieldBuilder in listOf(
                personFacetBuilder,
                subjectPersonBuilder,
                publisherPersonBuilder,
                producersPersonBuilder,
                contributorPersonBuilder,
                creatorPersonBuilder,
119
120
121
122
123
124
125
126
127
128
129
                subjectCorporateBodyBuilder,
                publisherCorporateBodyBuilder,
                producersCorporateBodyBuilder,
                contributorCorporateBodyBuilder,
                creatorCorporateBodyBuilder,
                subjectAgentBuilder,
                publisherAgentBuilder,
                producersAgentBuilder,
                contributorAgentBuilder,
                creatorAgentBuilder,

Jonas Waeber's avatar
Jonas Waeber committed
130
131
                placeFacetBuilder,
                placeCapturedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
132
                placesRelatedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
133
134
135
                dateCreatedBuilder,
                dateIssuedBuilder,
                temporalBuilder,
136
137
138
139
                suggestContainerBuilder,
                formats,
                genres,
                languages
140
            )) {
141
                if (builder.filter(item.value)) {
142
                    builder.append(key, item.value)
143
                }
144
145
146

                if (digitalIdentifierReferences.contains(item.key))
                    digitalIdentifierEntities.add(item.value)
147
148
            }
        }
149

Jonas Waeber's avatar
Jonas Waeber committed
150
151
152
        val recordIdentifiers = Filter.entitiesByProperty("identifiedBy", record, input)
        val recordTitles = Filter.entitiesByProperty("hasTitle", record, input)
        val recordRules = Filter.entitiesByProperty("regulatedBy", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
153

Jonas Waeber's avatar
Jonas Waeber committed
154
        val subjects = Filter.entitiesByProperty("hasSubject", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
155

Jonas Waeber's avatar
Jonas Waeber committed
156
        val digitalRules = Filter.entitiesByProperty("regulatedBy", digitalObject, input)
157

Jonas Waeber's avatar
Jonas Waeber committed
158
159
        val physicalRules = Filter.entitiesByProperty("regulatedBy", physicalObject, input)
        val physicalIdentifiers = Filter.entitiesByProperty("identifiedBy", physicalObject, input)
160

161
162
163
164
        val accessPhysical = Extract.typedEntityByType(physicalRules, "type", "access", "name")
            .flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
        val accessDigital = Extract.typedEntityByType(digitalRules, "type", "access", "name")
            .flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
165

166
        val usageDigital = Extract.typedEntityByType(digitalRules, "type", "usage", "sameAs").flatMap { it.toList() }
167

168
169
170
171
172
173
174
175
176
177
        val locator = try {
            val value = Extract.extractIdValue(digitalIdentifierEntities, KEYS.IdentifierType.main)
            if (value == null)
                ""
            else
                "${mediaUrl}${value}"
        } catch (ex: NoSuchElementException) {
            ""
        }

Jonas Waeber's avatar
Jonas Waeber committed
178
        val addLocator = Filter.checkLocator(digitalObject)
Jonas Waeber's avatar
Jonas Waeber committed
179
180
181
182
183
184
185
186
187
        val mediaLocation = if (addLocator) {
            if (Filter.checkSftpPrefix(digitalObject!!)) {
                "local"
            } else {
                "remote"
            }
        } else {
            null
        }
Jonas Waeber's avatar
Jonas Waeber committed
188

Jonas Waeber's avatar
Jonas Waeber committed
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
        val digitalObjectValues = digitalObject.let {
            if (it != null) {
                val width = it.getOrDefault("width", "") as String
                val height = it.getOrDefault("height", "") as String
                EnrichedDigitalMetadata(
                    hasFormat = it.getOrDefault("hasFormat", "") as String,
                    isDistributedOn = it.getOrDefault("isDistributedOn", "") as String,
                    hasMimeType = it.getOrDefault("hasMimeType", "") as String,
                    height = height,
                    width = width,
                    aspectRatio = AspectRatio.asFraction(width, height),
                    mediaResourceDescription = it.getOrDefault("mediaResourceDescription", "") as String,
                    orientation = it.getOrDefault("orientation", "") as String,
                    hasColourContent = it.getOrDefault("P60558", "") as String,
                    componentColor = Extract.listOfStrings(digitalObject?.get("componentColor"))

                )
            } else {
                EnrichedDigitalMetadata()
            }
        }

211
212
        val type = record[KEYS.ricoType].let {
            if (it == null) {
213
                translationMappers.getDocumentType("Andere")
214
            } else {
215
                translationMappers.getDocumentType(it as String)
216
217
218
            }
        }

219
220
        val recordSetId = extractRecordSet(record)

221
        return DocumentsSearchDoc(
222
223
224
            title = Extract.typedEntityByType(recordTitles, "type", "main", "title"),
            seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"),
            broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"),
225
            type = type,
Jonas Waeber's avatar
Jonas Waeber committed
226
            sourceID = try {
227
                Extract.extractIdValue(recordIdentifiers, KEYS.IdentifierType.original) ?: "NoSourceIdFound"
Jonas Waeber's avatar
Jonas Waeber committed
228
            } catch (ex: NoSuchElementException) {
Jonas Waeber's avatar
Jonas Waeber committed
229
                log.error("No source id found for record $key.")
Jonas Waeber's avatar
Jonas Waeber committed
230
231
                "NoSourceIdFound"
            },
Jonas Waeber's avatar
Jonas Waeber committed
232
233
234
235
236
237
            oldMemobaseId = try {
                Extract.extractIdValue(recordIdentifiers, KEYS.IdentifierType.oldMemobase) ?: ""
            } catch (ex: NoSuchElementException) {
                log.warn("No old memobase id found for record $key.")
                ""
            },
238
            sameAs = Extract.listOfStrings(record["sameAs"]),
239
            abstract = Extract.languageContainer("abstract (record id: $key)", record["abstract"]),
240
            recordId = key,
241
242
243
244
245
246
247
248
249
250
251
252
253
254
            institution = extractInstitution(record).map { value -> elasticSearchWrapper.getInstitutionName(value) },
            recordSet = FacetContainer(
                elasticSearchWrapper.getRecordSetName(recordSetId),
                null,
                if (recordSetId != "") listOf(recordSetId) else emptyList()
            ),
            descriptiveNote = Extract.languageContainer(
                "descriptiveNote (record id: $key)",
                record["descriptiveNote"]
            ),
            scopeAndContent = Extract.languageContainer(
                "scopeAndContent (record id: $key)",
                record["scopeAndContent"]
            ),
255
256
            relatedMaterial = Extract.languageContainer("relation (record id: $key)", record["relation"]),
            source = Extract.languageContainer("source (record id: $key)", record["source"]),
Jonas Waeber's avatar
Jonas Waeber committed
257
258
259
            temporal = temporalBuilder.build(),
            dateCreated = dateCreatedBuilder.build(),
            dateIssued = dateIssuedBuilder.build(),
Jonas Waeber's avatar
Jonas Waeber committed
260
261
            placeCapture = placeCapturedBuilder.build(),
            placeRelated = placesRelatedBuilder.build(),
262
            placeFacet = placeFacetBuilder.build(),
263
            rightsHolder = Extract.typedEntityByType(recordRules, "type", "holder", "name"),
Jonas Waeber's avatar
Jonas Waeber committed
264
265
266
267
            conditionsOfUse = Extract.languageContainer(
                "conditionsOfUse (record id: $key)",
                record[KEYS.conditionsOfUse]
            ),
268
            memoriavClaim = record[KEYS.sponsoredBy] != null,
Jonas Waeber's avatar
Jonas Waeber committed
269

270
271
272
            format = formats.build(),
            language = languages.build(),
            genre = genres.build(),
273

Jonas Waeber's avatar
Jonas Waeber committed
274
275
276
277
278
279
            keywords = subjects.flatMap {
                Extract.languageContainer(
                    "hasSubject (record id: $key)",
                    it[KEYS.prefLabel]
                )
            }.let {
280
281
282
                if (it.isEmpty())
                    LanguageContainer.EMPTY
                else
283
284
285
286
                    it.reduce { acc, languageContainer ->
                        acc.merge(languageContainer)
                    }
            },
287
288
289
290
291
292
293
            personSubject = subjectPersonBuilder.build(),
            personProducer = producersPersonBuilder.build(),
            personPublisher = publisherPersonBuilder.build(),
            personContributor = contributorPersonBuilder.build(),
            personCreator = creatorPersonBuilder.build(),
            personsFacet = personFacetBuilder.build(),

294
295
296
297
298
299
300
301
302
303
304
            corporateBodySubject = subjectCorporateBodyBuilder.build(),
            corporateBodyProducer = producersCorporateBodyBuilder.build(),
            corporateBodyPublisher = publisherCorporateBodyBuilder.build(),
            corporateBodyContributor = contributorCorporateBodyBuilder.build(),
            corporateBodyCreator = creatorCorporateBodyBuilder.build(),

            agentSubject = subjectAgentBuilder.build(),
            agentProducer = producersAgentBuilder.build(),
            agentPublisher = publisherAgentBuilder.build(),
            agentContributor = contributorAgentBuilder.build(),
            agentCreator = creatorAgentBuilder.build(),
305
306

            // DIGITAL & PHYSICAL
307
            access = accessDigital + accessPhysical,
308
309
310

            // DIGITAL
            accessDigital = accessDigital,
311
            durationDigital = Extract.listOfStrings(digitalObject?.get("duration")),
312
            colourDigital = Extract.listOfStrings(digitalObject?.get(KEYS.color)),
Jonas Waeber's avatar
Jonas Waeber committed
313
            digitalObjectNote = Extract.languageContainer("descriptiveNote", digitalObject?.get("descriptiveNote")),
Jonas Waeber's avatar
Jonas Waeber committed
314
            locator = if (addLocator) locator else null,
Jonas Waeber's avatar
Jonas Waeber committed
315
            mediaLocation = mediaLocation,
316
317
318
319
            usageConditionsDigital = Extract.languageContainer(
                "conditionsOfUse",
                digitalObject?.get("conditionsOfUse")
            ),
320
            usageDigital = usageDigital,
321
            usageDigitalGroup = usageDigital.map { translationMappers.getReuseStatement(it) },
322

Jonas Waeber's avatar
Jonas Waeber committed
323
324
            digital = digitalObjectValues,

325
326
            // PHYSICAL
            accessPhysical = accessPhysical,
327
            durationPhysical = Extract.listOfStrings(physicalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
328
            colourPhysical = Extract.languageContainer("P60558", physicalObject?.get("P60558")),
329
330
331
332
            physicalCharacteristics = Extract.languageContainer(
                "physicalCharacteristics",
                physicalObject?.get("physicalCharacteristics")
            ),
333
334
335
336
            physicalObjectNote = Extract.languageContainer(
                "descriptiveNote",
                physicalObject?.get("descriptiveNote")
            ),
337
338
339
340
            usageConditionsPhysical = Extract.languageContainer(
                "conditionsOfUse",
                physicalObject?.get("conditionsOfUse")
            ),
341
342
            usagePhysical = Extract.typedEntityByType(physicalRules, "type", "usage", "sameAs")
                .flatMap { it.toList() },
343
344
            callNumber = Extract.typedEntityByType(physicalIdentifiers, "type", "callNumber", "identifier")
                .flatMap { it.toList() },
Jonas Waeber's avatar
Jonas Waeber committed
345
            published = (record[KEYS.isPublished] as Boolean?) ?: false,
Jonas Waeber's avatar
Jonas Waeber committed
346
            suggest = suggestContainerBuilder.build()[0]
Jonas Waeber's avatar
Jonas Waeber committed
347
348
349
        )
    }
}