DocumentsSearchDocBuilder.kt 17.3 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase

21
import ch.memobase.rdf.NS
22
import ch.memobase.rdf.RICO
Jonas Waeber's avatar
Jonas Waeber committed
23
24
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
Jonas Waeber's avatar
Jonas Waeber committed
25
26
27
28
29
30
31
32
33
import org.memobase.builders.AgentContainerBuilder
import org.memobase.builders.DateContainerBuilder
import org.memobase.builders.EnrichedFacetContainerBuilder
import org.memobase.builders.FacettedContainerBuilder
import org.memobase.builders.IFieldBuilder
import org.memobase.builders.PersonFacetBuilder
import org.memobase.builders.PlaceFacetBuilder
import org.memobase.builders.SuggestContainerBuilder
import org.memobase.helpers.AspectRatio
Jonas Waeber's avatar
Jonas Waeber committed
34
import org.memobase.helpers.Constants
35
import org.memobase.helpers.ElasticSearchWrapper
Jonas Waeber's avatar
Jonas Waeber committed
36
37
38
39
40
import org.memobase.helpers.Extract
import org.memobase.helpers.FacetBuildHelpers
import org.memobase.helpers.Filter
import org.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractInstitution
import org.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractRecordSet
41
import org.memobase.helpers.JsonUtility
42
import org.memobase.helpers.TranslationMappers
43
import org.memobase.model.DocumentsSearchDoc
Jonas Waeber's avatar
Jonas Waeber committed
44
import org.memobase.model.EnrichedDigitalMetadata
45
import org.memobase.model.FacetContainer
46
import org.memobase.model.LanguageContainer
47
import org.memobase.model.Schema
Jonas Waeber's avatar
Jonas Waeber committed
48

49
class DocumentsSearchDocBuilder(
50
    private val translationMappers: TranslationMappers,
51
    private val elasticSearchWrapper: ElasticSearchWrapper,
52
53
    private val mediaUrl: String
) {
Jonas Waeber's avatar
Jonas Waeber committed
54
    private val log = LogManager.getLogger("SearchDocTransform")
55

56
    fun transform(key: String, input: Map<String, JsonObject>): Schema {
57

58
        val record = input[JsonUtility.recordTag] ?: throw InvalidInputException("No record defined in the message.")
59
        val digitalObject =
Jonas Waeber's avatar
Jonas Waeber committed
60
61
            input.values.firstOrNull { it[Constants.atType] == RICO.Instantiation.uri
                    && it[RICO.type.localName] == RICO.Types.Instantiation.digitalObject }
62
        val physicalObject =
Jonas Waeber's avatar
Jonas Waeber committed
63
64
            input.values.firstOrNull { it[Constants.atType] == RICO.Instantiation.uri
                    && it[RICO.type.localName] == RICO.Types.Instantiation.physicalObject }
65

Jonas Waeber's avatar
Jonas Waeber committed
66
67
68
69
70
71
72
73
74
        val keywordIds = Extract.identifiers(record[Constants.hasSubject])
        val genreIds = Extract.identifiers(record[Constants.hasGenre])
        val publishedByIds = Extract.identifiers(record[Constants.publishedBy])
        val producerIds = Extract.identifiers(record[Constants.producer])
        val spatialIds = Extract.identifiers(record[Constants.spatial])
        val placeOfCaptureIds = Extract.identifiers(record[Constants.placeOfCapture])
        val dateCreatedIds = Extract.identifiers(record[Constants.created])
        val dateIssuedIds = Extract.identifiers(record[Constants.issued])
        val temporalIds = Extract.identifiers(record[Constants.temporal])
Jonas Waeber's avatar
Jonas Waeber committed
75

76
        val personFacetBuilder = PersonFacetBuilder()
Jonas Waeber's avatar
Jonas Waeber committed
77
78
79
        val subjectPersonBuilder = AgentContainerBuilder(keywordIds, Constants.Person, null, input)
        val publisherPersonBuilder = AgentContainerBuilder(publishedByIds, Constants.Person, null, input)
        val producersPersonBuilder = AgentContainerBuilder(producerIds, Constants.Person, null, input)
Jonas Waeber's avatar
Jonas Waeber committed
80
81
        val contributorPersonBuilder =
            AgentContainerBuilder(emptyList(), Constants.Person, Constants.contributor, input)
Jonas Waeber's avatar
Jonas Waeber committed
82
83
84
85
86
        val creatorPersonBuilder = AgentContainerBuilder(emptyList(), Constants.Person, Constants.creator, input)

        val subjectCorporateBodyBuilder = AgentContainerBuilder(keywordIds, Constants.CorporateBody, null, input)
        val publisherCorporateBodyBuilder = AgentContainerBuilder(publishedByIds, Constants.CorporateBody, null, input)
        val producersCorporateBodyBuilder = AgentContainerBuilder(producerIds, Constants.CorporateBody, null, input)
87
        val contributorCorporateBodyBuilder =
Jonas Waeber's avatar
Jonas Waeber committed
88
            AgentContainerBuilder(emptyList(), Constants.CorporateBody, Constants.contributor, input)
Jonas Waeber's avatar
Jonas Waeber committed
89
90
        val creatorCorporateBodyBuilder =
            AgentContainerBuilder(emptyList(), Constants.CorporateBody, Constants.creator, input)
91

Jonas Waeber's avatar
Jonas Waeber committed
92
93
94
95
96
        val subjectAgentBuilder = AgentContainerBuilder(keywordIds, Constants.Agent, null, input)
        val publisherAgentBuilder = AgentContainerBuilder(publishedByIds, Constants.Agent, null, input)
        val producersAgentBuilder = AgentContainerBuilder(producerIds, Constants.Agent, null, input)
        val contributorAgentBuilder = AgentContainerBuilder(emptyList(), Constants.Agent, Constants.contributor, input)
        val creatorAgentBuilder = AgentContainerBuilder(emptyList(), Constants.Agent, Constants.creator, input)
97

Jonas Waeber's avatar
Jonas Waeber committed
98
99
        val placesRelatedBuilder =
            FacettedContainerBuilder(spatialIds, Constants.Place, Constants.name, FacetBuildHelpers::place)
100
        val placeCapturedBuilder =
Jonas Waeber's avatar
Jonas Waeber committed
101
            FacettedContainerBuilder(placeOfCaptureIds, Constants.Place, Constants.name, FacetBuildHelpers::place)
Jonas Waeber's avatar
Jonas Waeber committed
102

103
104
        val placeFacetBuilder = PlaceFacetBuilder()

Jonas Waeber's avatar
Jonas Waeber committed
105
106
107
108
        val dateCreatedBuilder = DateContainerBuilder(dateCreatedIds)
        val dateIssuedBuilder = DateContainerBuilder(dateIssuedIds)
        val temporalBuilder = DateContainerBuilder(temporalIds)

109
        val suggestContainerBuilder = SuggestContainerBuilder(keywordIds)
Jonas Waeber's avatar
Jonas Waeber committed
110

111
112
113
        val digitalIdentifierReferences = Extract.identifiers(digitalObject?.get("identifiedBy"))
        val digitalIdentifierEntities = mutableListOf<JsonObject>()

Jonas Waeber's avatar
Jonas Waeber committed
114
115
116
        val formats = EnrichedFacetContainerBuilder(emptyList(), NS.rico + Constants.CarrierType, Constants.name, input)
        val languages = EnrichedFacetContainerBuilder(emptyList(), NS.rico + Constants.Language, Constants.name, input)
        val genres = EnrichedFacetContainerBuilder(genreIds, NS.skos + Constants.Concept, Constants.prefLabel, input)
117

118
        for (item in input.entries) {
119
120
121
122
123
124
125
            for (builder: IFieldBuilder in listOf(
                personFacetBuilder,
                subjectPersonBuilder,
                publisherPersonBuilder,
                producersPersonBuilder,
                contributorPersonBuilder,
                creatorPersonBuilder,
126
127
128
129
130
131
132
133
134
135
136
                subjectCorporateBodyBuilder,
                publisherCorporateBodyBuilder,
                producersCorporateBodyBuilder,
                contributorCorporateBodyBuilder,
                creatorCorporateBodyBuilder,
                subjectAgentBuilder,
                publisherAgentBuilder,
                producersAgentBuilder,
                contributorAgentBuilder,
                creatorAgentBuilder,

Jonas Waeber's avatar
Jonas Waeber committed
137
138
                placeFacetBuilder,
                placeCapturedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
139
                placesRelatedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
140
141
142
                dateCreatedBuilder,
                dateIssuedBuilder,
                temporalBuilder,
143
144
145
146
                suggestContainerBuilder,
                formats,
                genres,
                languages
147
            )) {
148
                if (builder.filter(item.value)) {
149
                    builder.append(key, item.value)
150
                }
151
152
153

                if (digitalIdentifierReferences.contains(item.key))
                    digitalIdentifierEntities.add(item.value)
154
155
            }
        }
156

Jonas Waeber's avatar
Jonas Waeber committed
157
158
159
        val recordIdentifiers = Filter.entitiesByProperty("identifiedBy", record, input)
        val recordTitles = Filter.entitiesByProperty("hasTitle", record, input)
        val recordRules = Filter.entitiesByProperty("regulatedBy", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
160

Jonas Waeber's avatar
Jonas Waeber committed
161
        val subjects = Filter.entitiesByProperty("hasSubject", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
162

Jonas Waeber's avatar
Jonas Waeber committed
163
        val digitalRules = Filter.entitiesByProperty("regulatedBy", digitalObject, input)
164

Jonas Waeber's avatar
Jonas Waeber committed
165
166
        val physicalRules = Filter.entitiesByProperty("regulatedBy", physicalObject, input)
        val physicalIdentifiers = Filter.entitiesByProperty("identifiedBy", physicalObject, input)
167

168
169
170
171
        val accessPhysical = Extract.typedEntityByType(physicalRules, "type", "access", "name")
            .flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
        val accessDigital = Extract.typedEntityByType(digitalRules, "type", "access", "name")
            .flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
172

173
        val usageDigital = Extract.typedEntityByType(digitalRules, "type", "usage", "sameAs").flatMap { it.toList() }
174

175
        val locator = try {
Jonas Waeber's avatar
Jonas Waeber committed
176
            val value = Extract.extractIdValue(digitalIdentifierEntities, Constants.IdentifierType.main)
177
178
179
180
181
182
183
184
            if (value == null)
                ""
            else
                "${mediaUrl}${value}"
        } catch (ex: NoSuchElementException) {
            ""
        }

Jonas Waeber's avatar
Jonas Waeber committed
185
        val addLocator = Filter.checkLocator(digitalObject)
Jonas Waeber's avatar
Jonas Waeber committed
186
187
188
189
190
191
192
193
194
        val mediaLocation = if (addLocator) {
            if (Filter.checkSftpPrefix(digitalObject!!)) {
                "local"
            } else {
                "remote"
            }
        } else {
            null
        }
Jonas Waeber's avatar
Jonas Waeber committed
195

Jonas Waeber's avatar
Jonas Waeber committed
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
        val digitalObjectValues = digitalObject.let {
            if (it != null) {
                val width = it.getOrDefault("width", "") as String
                val height = it.getOrDefault("height", "") as String
                EnrichedDigitalMetadata(
                    hasFormat = it.getOrDefault("hasFormat", "") as String,
                    isDistributedOn = it.getOrDefault("isDistributedOn", "") as String,
                    hasMimeType = it.getOrDefault("hasMimeType", "") as String,
                    height = height,
                    width = width,
                    aspectRatio = AspectRatio.asFraction(width, height),
                    mediaResourceDescription = it.getOrDefault("mediaResourceDescription", "") as String,
                    orientation = it.getOrDefault("orientation", "") as String,
                    hasColourContent = it.getOrDefault("P60558", "") as String,
                    componentColor = Extract.listOfStrings(digitalObject?.get("componentColor"))

                )
            } else {
                EnrichedDigitalMetadata()
            }
        }

Jonas Waeber's avatar
Jonas Waeber committed
218
        val type = record[Constants.ricoType].let {
219
            if (it == null) {
220
                translationMappers.getDocumentType("Andere")
221
            } else {
222
                translationMappers.getDocumentType(it as String)
223
224
225
            }
        }

226
227
        val recordSetId = extractRecordSet(record)

228
        return DocumentsSearchDoc(
229
230
231
            title = Extract.typedEntityByType(recordTitles, "type", "main", "title"),
            seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"),
            broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"),
232
            type = type,
Jonas Waeber's avatar
Jonas Waeber committed
233
            sourceID = try {
Jonas Waeber's avatar
Jonas Waeber committed
234
                Extract.extractIdValue(recordIdentifiers, Constants.IdentifierType.original) ?: "NoSourceIdFound"
Jonas Waeber's avatar
Jonas Waeber committed
235
            } catch (ex: NoSuchElementException) {
Jonas Waeber's avatar
Jonas Waeber committed
236
                log.error("No source id found for record $key.")
Jonas Waeber's avatar
Jonas Waeber committed
237
238
                "NoSourceIdFound"
            },
Jonas Waeber's avatar
Jonas Waeber committed
239
            oldMemobaseId = try {
Jonas Waeber's avatar
Jonas Waeber committed
240
                Extract.extractIdValue(recordIdentifiers, Constants.IdentifierType.oldMemobase) ?: ""
Jonas Waeber's avatar
Jonas Waeber committed
241
242
243
244
            } catch (ex: NoSuchElementException) {
                log.warn("No old memobase id found for record $key.")
                ""
            },
245
            sameAs = Extract.listOfStrings(record["sameAs"]),
246
            abstract = Extract.languageContainer("abstract (record id: $key)", record["abstract"]),
247
            recordId = key,
248
249
250
251
252
253
254
255
256
257
258
259
260
261
            institution = extractInstitution(record).map { value -> elasticSearchWrapper.getInstitutionName(value) },
            recordSet = FacetContainer(
                elasticSearchWrapper.getRecordSetName(recordSetId),
                null,
                if (recordSetId != "") listOf(recordSetId) else emptyList()
            ),
            descriptiveNote = Extract.languageContainer(
                "descriptiveNote (record id: $key)",
                record["descriptiveNote"]
            ),
            scopeAndContent = Extract.languageContainer(
                "scopeAndContent (record id: $key)",
                record["scopeAndContent"]
            ),
262
263
            relatedMaterial = Extract.languageContainer("relation (record id: $key)", record["relation"]),
            source = Extract.languageContainer("source (record id: $key)", record["source"]),
Jonas Waeber's avatar
Jonas Waeber committed
264
265
266
            temporal = temporalBuilder.build(),
            dateCreated = dateCreatedBuilder.build(),
            dateIssued = dateIssuedBuilder.build(),
Jonas Waeber's avatar
Jonas Waeber committed
267
268
            placeCapture = placeCapturedBuilder.build(),
            placeRelated = placesRelatedBuilder.build(),
269
            placeFacet = placeFacetBuilder.build(),
270
            rightsHolder = Extract.typedEntityByType(recordRules, "type", "holder", "name"),
Jonas Waeber's avatar
Jonas Waeber committed
271
272
            conditionsOfUse = Extract.languageContainer(
                "conditionsOfUse (record id: $key)",
Jonas Waeber's avatar
Jonas Waeber committed
273
                record[Constants.conditionsOfUse]
Jonas Waeber's avatar
Jonas Waeber committed
274
            ),
Jonas Waeber's avatar
Jonas Waeber committed
275
            memoriavClaim = record[Constants.sponsoredBy] != null,
Jonas Waeber's avatar
Jonas Waeber committed
276

277
278
279
            format = formats.build(),
            language = languages.build(),
            genre = genres.build(),
280

Jonas Waeber's avatar
Jonas Waeber committed
281
282
283
            keywords = subjects.flatMap {
                Extract.languageContainer(
                    "hasSubject (record id: $key)",
Jonas Waeber's avatar
Jonas Waeber committed
284
                    it[Constants.prefLabel]
Jonas Waeber's avatar
Jonas Waeber committed
285
286
                )
            }.let {
287
288
289
                if (it.isEmpty())
                    LanguageContainer.EMPTY
                else
290
291
292
293
                    it.reduce { acc, languageContainer ->
                        acc.merge(languageContainer)
                    }
            },
294
295
296
297
298
299
300
            personSubject = subjectPersonBuilder.build(),
            personProducer = producersPersonBuilder.build(),
            personPublisher = publisherPersonBuilder.build(),
            personContributor = contributorPersonBuilder.build(),
            personCreator = creatorPersonBuilder.build(),
            personsFacet = personFacetBuilder.build(),

301
302
303
304
305
306
307
308
309
310
311
            corporateBodySubject = subjectCorporateBodyBuilder.build(),
            corporateBodyProducer = producersCorporateBodyBuilder.build(),
            corporateBodyPublisher = publisherCorporateBodyBuilder.build(),
            corporateBodyContributor = contributorCorporateBodyBuilder.build(),
            corporateBodyCreator = creatorCorporateBodyBuilder.build(),

            agentSubject = subjectAgentBuilder.build(),
            agentProducer = producersAgentBuilder.build(),
            agentPublisher = publisherAgentBuilder.build(),
            agentContributor = contributorAgentBuilder.build(),
            agentCreator = creatorAgentBuilder.build(),
312
313

            // DIGITAL & PHYSICAL
314
            access = accessDigital + accessPhysical,
315
316
317

            // DIGITAL
            accessDigital = accessDigital,
318
            durationDigital = Extract.listOfStrings(digitalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
319
            colourDigital = Extract.listOfStrings(digitalObject?.get(Constants.color)),
Jonas Waeber's avatar
Jonas Waeber committed
320
            digitalObjectNote = Extract.languageContainer("descriptiveNote", digitalObject?.get("descriptiveNote")),
Jonas Waeber's avatar
Jonas Waeber committed
321
            locator = if (addLocator) locator else null,
Jonas Waeber's avatar
Jonas Waeber committed
322
            mediaLocation = mediaLocation,
323
324
325
326
            usageConditionsDigital = Extract.languageContainer(
                "conditionsOfUse",
                digitalObject?.get("conditionsOfUse")
            ),
327
            usageDigital = usageDigital,
328
            usageDigitalGroup = usageDigital.map { translationMappers.getReuseStatement(it) },
329

Jonas Waeber's avatar
Jonas Waeber committed
330
331
            digital = digitalObjectValues,

332
333
            // PHYSICAL
            accessPhysical = accessPhysical,
334
            durationPhysical = Extract.listOfStrings(physicalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
335
            colourPhysical = Extract.languageContainer("P60558", physicalObject?.get("P60558")),
336
337
338
339
            physicalCharacteristics = Extract.languageContainer(
                "physicalCharacteristics",
                physicalObject?.get("physicalCharacteristics")
            ),
340
341
342
343
            physicalObjectNote = Extract.languageContainer(
                "descriptiveNote",
                physicalObject?.get("descriptiveNote")
            ),
344
345
346
347
            usageConditionsPhysical = Extract.languageContainer(
                "conditionsOfUse",
                physicalObject?.get("conditionsOfUse")
            ),
348
349
            usagePhysical = Extract.typedEntityByType(physicalRules, "type", "usage", "sameAs")
                .flatMap { it.toList() },
350
351
            callNumber = Extract.typedEntityByType(physicalIdentifiers, "type", "callNumber", "identifier")
                .flatMap { it.toList() },
352
353
354
355
            accessInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(recordSetId, "access"),
            originalInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(recordSetId, "original"),
            masterInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(recordSetId, "master"),

Jonas Waeber's avatar
Jonas Waeber committed
356
            published = (record[Constants.isPublished] as Boolean?) ?: false,
Jonas Waeber's avatar
Jonas Waeber committed
357
            suggest = suggestContainerBuilder.build()[0]
Jonas Waeber's avatar
Jonas Waeber committed
358
359
360
        )
    }
}