DocumentsSearchDocBuilder.kt 17.6 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
/*
 * search-doc-service
Jonas Waeber's avatar
Jonas Waeber committed
3
 * Copyright (C) 2020-2021 Memoriav
Jonas Waeber's avatar
Jonas Waeber committed
4
5
6
7
8
9
10
11
12
13
14
15
16
17
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
18
package ch.memobase
Jonas Waeber's avatar
Jonas Waeber committed
19

20
import ch.memobase.builders.AgentContainerBuilder
Jonas Waeber's avatar
Jonas Waeber committed
21
import ch.memobase.builders.DateSearchFieldBuilder
22
23
24
25
26
27
28
29
import ch.memobase.builders.EnrichedFacetContainerBuilder
import ch.memobase.builders.FacettedContainerBuilder
import ch.memobase.builders.IFieldBuilder
import ch.memobase.builders.PersonFacetBuilder
import ch.memobase.builders.PlaceFacetBuilder
import ch.memobase.builders.SuggestContainerBuilder
import ch.memobase.helpers.AspectRatio
import ch.memobase.helpers.Constants
30
import ch.memobase.helpers.Constants.IdentifierType
31
32
33
34
35
36
37
38
39
40
41
42
import ch.memobase.helpers.ElasticSearchWrapper
import ch.memobase.helpers.Extract
import ch.memobase.helpers.FacetBuildHelpers
import ch.memobase.helpers.Filter
import ch.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractRecordSet
import ch.memobase.helpers.JsonUtility
import ch.memobase.helpers.TranslationMappers
import ch.memobase.model.DocumentsSearchDoc
import ch.memobase.model.EnrichedDigitalMetadata
import ch.memobase.model.FacetContainer
import ch.memobase.model.LanguageContainer
import ch.memobase.model.Schema
43
44
45
46
import ch.memobase.rdf.NS
import ch.memobase.rdf.RICO
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
Jonas Waeber's avatar
Jonas Waeber committed
47

48
class DocumentsSearchDocBuilder(
49
    private val translationMappers: TranslationMappers,
50
    private val elasticSearchWrapper: ElasticSearchWrapper,
51
52
    private val mediaUrl: String
) {
Jonas Waeber's avatar
Jonas Waeber committed
53
    private val log = LogManager.getLogger(this::class.java)
54

55
    fun transform(key: String, input: Map<String, JsonObject>): Schema {
56

57
58
        val record =
            input[JsonUtility.recordTag] ?: throw InvalidInputException("No record defined in the message $key.")
59
        val digitalObject =
60
61
62
63
            input.values.firstOrNull {
                it[Constants.atType] == RICO.Instantiation.uri
                        && it[RICO.type.localName] == RICO.Types.Instantiation.digitalObject
            }
64
        val physicalObject =
65
66
67
68
            input.values.firstOrNull {
                it[Constants.atType] == RICO.Instantiation.uri
                        && it[RICO.type.localName] == RICO.Types.Instantiation.physicalObject
            }
69

Jonas Waeber's avatar
Jonas Waeber committed
70
71
72
73
74
75
76
77
78
        val keywordIds = Extract.identifiers(record[Constants.hasSubject])
        val genreIds = Extract.identifiers(record[Constants.hasGenre])
        val publishedByIds = Extract.identifiers(record[Constants.publishedBy])
        val producerIds = Extract.identifiers(record[Constants.producer])
        val spatialIds = Extract.identifiers(record[Constants.spatial])
        val placeOfCaptureIds = Extract.identifiers(record[Constants.placeOfCapture])
        val dateCreatedIds = Extract.identifiers(record[Constants.created])
        val dateIssuedIds = Extract.identifiers(record[Constants.issued])
        val temporalIds = Extract.identifiers(record[Constants.temporal])
Jonas Waeber's avatar
Jonas Waeber committed
79

80
        val personFacetBuilder = PersonFacetBuilder()
Jonas Waeber's avatar
Jonas Waeber committed
81
82
83
        val subjectPersonBuilder = AgentContainerBuilder(keywordIds, Constants.Person, null, input)
        val publisherPersonBuilder = AgentContainerBuilder(publishedByIds, Constants.Person, null, input)
        val producersPersonBuilder = AgentContainerBuilder(producerIds, Constants.Person, null, input)
Jonas Waeber's avatar
Jonas Waeber committed
84
85
        val contributorPersonBuilder =
            AgentContainerBuilder(emptyList(), Constants.Person, Constants.contributor, input)
Jonas Waeber's avatar
Jonas Waeber committed
86
87
88
89
90
        val creatorPersonBuilder = AgentContainerBuilder(emptyList(), Constants.Person, Constants.creator, input)

        val subjectCorporateBodyBuilder = AgentContainerBuilder(keywordIds, Constants.CorporateBody, null, input)
        val publisherCorporateBodyBuilder = AgentContainerBuilder(publishedByIds, Constants.CorporateBody, null, input)
        val producersCorporateBodyBuilder = AgentContainerBuilder(producerIds, Constants.CorporateBody, null, input)
91
        val contributorCorporateBodyBuilder =
Jonas Waeber's avatar
Jonas Waeber committed
92
            AgentContainerBuilder(emptyList(), Constants.CorporateBody, Constants.contributor, input)
Jonas Waeber's avatar
Jonas Waeber committed
93
94
        val creatorCorporateBodyBuilder =
            AgentContainerBuilder(emptyList(), Constants.CorporateBody, Constants.creator, input)
95

Jonas Waeber's avatar
Jonas Waeber committed
96
97
98
99
100
        val subjectAgentBuilder = AgentContainerBuilder(keywordIds, Constants.Agent, null, input)
        val publisherAgentBuilder = AgentContainerBuilder(publishedByIds, Constants.Agent, null, input)
        val producersAgentBuilder = AgentContainerBuilder(producerIds, Constants.Agent, null, input)
        val contributorAgentBuilder = AgentContainerBuilder(emptyList(), Constants.Agent, Constants.contributor, input)
        val creatorAgentBuilder = AgentContainerBuilder(emptyList(), Constants.Agent, Constants.creator, input)
101

Jonas Waeber's avatar
Jonas Waeber committed
102
103
        val placesRelatedBuilder =
            FacettedContainerBuilder(spatialIds, Constants.Place, Constants.name, FacetBuildHelpers::place)
104
        val placeCapturedBuilder =
Jonas Waeber's avatar
Jonas Waeber committed
105
            FacettedContainerBuilder(placeOfCaptureIds, Constants.Place, Constants.name, FacetBuildHelpers::place)
Jonas Waeber's avatar
Jonas Waeber committed
106

107
108
        val placeFacetBuilder = PlaceFacetBuilder()

Jonas Waeber's avatar
Jonas Waeber committed
109
110
111
        val dateCreatedBuilder = DateSearchFieldBuilder(dateCreatedIds, "created")
        val dateIssuedBuilder = DateSearchFieldBuilder(dateIssuedIds, "issued")
        val temporalBuilder = DateSearchFieldBuilder(temporalIds, "temporal")
Jonas Waeber's avatar
Jonas Waeber committed
112

113
        val suggestContainerBuilder = SuggestContainerBuilder(keywordIds)
Jonas Waeber's avatar
Jonas Waeber committed
114

115
116
117
        val digitalIdentifierReferences = Extract.identifiers(digitalObject?.get("identifiedBy"))
        val digitalIdentifierEntities = mutableListOf<JsonObject>()

Jonas Waeber's avatar
Jonas Waeber committed
118
119
120
        val formats = EnrichedFacetContainerBuilder(emptyList(), NS.rico + Constants.CarrierType, Constants.name, input)
        val languages = EnrichedFacetContainerBuilder(emptyList(), NS.rico + Constants.Language, Constants.name, input)
        val genres = EnrichedFacetContainerBuilder(genreIds, NS.skos + Constants.Concept, Constants.prefLabel, input)
121

122
        for (item in input.entries) {
123
124
125
126
127
128
129
            for (builder: IFieldBuilder in listOf(
                personFacetBuilder,
                subjectPersonBuilder,
                publisherPersonBuilder,
                producersPersonBuilder,
                contributorPersonBuilder,
                creatorPersonBuilder,
130
131
132
133
134
135
136
137
138
139
140
                subjectCorporateBodyBuilder,
                publisherCorporateBodyBuilder,
                producersCorporateBodyBuilder,
                contributorCorporateBodyBuilder,
                creatorCorporateBodyBuilder,
                subjectAgentBuilder,
                publisherAgentBuilder,
                producersAgentBuilder,
                contributorAgentBuilder,
                creatorAgentBuilder,

Jonas Waeber's avatar
Jonas Waeber committed
141
142
                placeFacetBuilder,
                placeCapturedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
143
                placesRelatedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
144
145
146
                dateCreatedBuilder,
                dateIssuedBuilder,
                temporalBuilder,
147
148
149
150
                suggestContainerBuilder,
                formats,
                genres,
                languages
151
            )) {
152
                if (builder.filter(item.value)) {
153
                    builder.append(key, item.value)
154
                }
155
156
157

                if (digitalIdentifierReferences.contains(item.key))
                    digitalIdentifierEntities.add(item.value)
158
159
            }
        }
160

Jonas Waeber's avatar
Jonas Waeber committed
161
162
163
        val recordIdentifiers = Filter.entitiesByProperty("identifiedBy", record, input)
        val recordTitles = Filter.entitiesByProperty("hasTitle", record, input)
        val recordRules = Filter.entitiesByProperty("regulatedBy", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
164

Jonas Waeber's avatar
Jonas Waeber committed
165
        val subjects = Filter.entitiesByProperty("hasSubject", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
166

Jonas Waeber's avatar
Jonas Waeber committed
167
        val digitalRules = Filter.entitiesByProperty("regulatedBy", digitalObject, input)
168

Jonas Waeber's avatar
Jonas Waeber committed
169
170
        val physicalRules = Filter.entitiesByProperty("regulatedBy", physicalObject, input)
        val physicalIdentifiers = Filter.entitiesByProperty("identifiedBy", physicalObject, input)
171

172
173
174
175
        val accessPhysical = Extract.typedEntityByType(physicalRules, "type", "access", "name")
            .flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
        val accessDigital = Extract.typedEntityByType(digitalRules, "type", "access", "name")
            .flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
176

177
        val usageDigital = Extract.typedEntityByType(digitalRules, "type", "usage", "sameAs").flatMap { it.toList() }
178

179
        val locator = try {
Jonas Waeber's avatar
Jonas Waeber committed
180
            val value = Extract.extractIdValue(digitalIdentifierEntities, Constants.IdentifierType.main)
181
182
183
184
185
186
187
188
            if (value == null)
                ""
            else
                "${mediaUrl}${value}"
        } catch (ex: NoSuchElementException) {
            ""
        }

Jonas Waeber's avatar
Jonas Waeber committed
189
        val addLocator = Filter.checkLocator(digitalObject)
Jonas Waeber's avatar
Jonas Waeber committed
190
191
192
193
194
195
196
197
198
        val mediaLocation = if (addLocator) {
            if (Filter.checkSftpPrefix(digitalObject!!)) {
                "local"
            } else {
                "remote"
            }
        } else {
            null
        }
Jonas Waeber's avatar
Jonas Waeber committed
199

Jonas Waeber's avatar
Jonas Waeber committed
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
        val digitalObjectValues = digitalObject.let {
            if (it != null) {
                val width = it.getOrDefault("width", "") as String
                val height = it.getOrDefault("height", "") as String
                EnrichedDigitalMetadata(
                    hasFormat = it.getOrDefault("hasFormat", "") as String,
                    isDistributedOn = it.getOrDefault("isDistributedOn", "") as String,
                    hasMimeType = it.getOrDefault("hasMimeType", "") as String,
                    height = height,
                    width = width,
                    aspectRatio = AspectRatio.asFraction(width, height),
                    mediaResourceDescription = it.getOrDefault("mediaResourceDescription", "") as String,
                    orientation = it.getOrDefault("orientation", "") as String,
                    hasColourContent = it.getOrDefault("P60558", "") as String,
                    componentColor = Extract.listOfStrings(digitalObject?.get("componentColor"))

                )
            } else {
                EnrichedDigitalMetadata()
            }
        }

Jonas Waeber's avatar
Jonas Waeber committed
222
        val type = record[Constants.ricoType].let {
223
            if (it == null) {
224
                translationMappers.getDocumentType("Andere")
225
            } else {
226
                translationMappers.getDocumentType(it as String)
227
228
229
            }
        }

230
231
        val recordSetId = extractRecordSet(record)

232
        return DocumentsSearchDoc(
233
234
235
            title = Extract.typedEntityByType(recordTitles, "type", "main", "title"),
            seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"),
            broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"),
236
            type = type,
Jonas Waeber's avatar
Jonas Waeber committed
237
            sourceID = try {
238
                Extract.extractIdValue(recordIdentifiers, IdentifierType.original) ?: "NoSourceIdFound"
Jonas Waeber's avatar
Jonas Waeber committed
239
            } catch (ex: NoSuchElementException) {
Jonas Waeber's avatar
Jonas Waeber committed
240
                log.error("No source id found for record $key.")
Jonas Waeber's avatar
Jonas Waeber committed
241
242
                "NoSourceIdFound"
            },
Jonas Waeber's avatar
Jonas Waeber committed
243
            oldMemobaseId = try {
244
                Extract.extractIdValue(recordIdentifiers, IdentifierType.oldMemobase) ?: ""
Jonas Waeber's avatar
Jonas Waeber committed
245
246
247
248
            } catch (ex: NoSuchElementException) {
                log.warn("No old memobase id found for record $key.")
                ""
            },
249
            sameAs = Extract.listOfStrings(record["sameAs"]),
250
            abstract = Extract.languageContainer("abstract (record id: $key)", record["abstract"]),
251
            recordId = key,
252
            institution = elasticSearchWrapper.getInstitutionNamesFromRecordSet(recordSetId),
253
254
255
256
257
258
259
260
261
262
263
264
265
            recordSet = FacetContainer(
                elasticSearchWrapper.getRecordSetName(recordSetId),
                null,
                if (recordSetId != "") listOf(recordSetId) else emptyList()
            ),
            descriptiveNote = Extract.languageContainer(
                "descriptiveNote (record id: $key)",
                record["descriptiveNote"]
            ),
            scopeAndContent = Extract.languageContainer(
                "scopeAndContent (record id: $key)",
                record["scopeAndContent"]
            ),
266
267
            relatedMaterial = Extract.languageContainer("relation (record id: $key)", record["relation"]),
            source = Extract.languageContainer("source (record id: $key)", record["source"]),
Jonas Waeber's avatar
Jonas Waeber committed
268
269
270
271
            temporal = temporalBuilder.build().first,
            dateCreated = dateCreatedBuilder.build().first,
            dateIssued = dateIssuedBuilder.build().first,
            dateFacetField = dateCreatedBuilder.build().second + dateIssuedBuilder.build().second,
Jonas Waeber's avatar
Jonas Waeber committed
272
273
            placeCapture = placeCapturedBuilder.build(),
            placeRelated = placesRelatedBuilder.build(),
274
            placeFacet = placeFacetBuilder.build(),
275
            rightsHolder = Extract.typedEntityByType(recordRules, "type", "holder", "name"),
Jonas Waeber's avatar
Jonas Waeber committed
276
277
            conditionsOfUse = Extract.languageContainer(
                "conditionsOfUse (record id: $key)",
Jonas Waeber's avatar
Jonas Waeber committed
278
                record[Constants.conditionsOfUse]
Jonas Waeber's avatar
Jonas Waeber committed
279
            ),
Jonas Waeber's avatar
Jonas Waeber committed
280
            memoriavClaim = record[Constants.sponsoredBy] != null,
Jonas Waeber's avatar
Jonas Waeber committed
281

282
283
284
            format = formats.build(),
            language = languages.build(),
            genre = genres.build(),
285

Jonas Waeber's avatar
Jonas Waeber committed
286
287
288
            keywords = subjects.flatMap {
                Extract.languageContainer(
                    "hasSubject (record id: $key)",
Jonas Waeber's avatar
Jonas Waeber committed
289
                    it[Constants.prefLabel]
Jonas Waeber's avatar
Jonas Waeber committed
290
291
                )
            }.let {
292
293
294
                if (it.isEmpty())
                    LanguageContainer.EMPTY
                else
295
296
297
298
                    it.reduce { acc, languageContainer ->
                        acc.merge(languageContainer)
                    }
            },
299
300
301
302
303
304
305
            personSubject = subjectPersonBuilder.build(),
            personProducer = producersPersonBuilder.build(),
            personPublisher = publisherPersonBuilder.build(),
            personContributor = contributorPersonBuilder.build(),
            personCreator = creatorPersonBuilder.build(),
            personsFacet = personFacetBuilder.build(),

306
307
308
309
310
311
312
313
314
315
316
            corporateBodySubject = subjectCorporateBodyBuilder.build(),
            corporateBodyProducer = producersCorporateBodyBuilder.build(),
            corporateBodyPublisher = publisherCorporateBodyBuilder.build(),
            corporateBodyContributor = contributorCorporateBodyBuilder.build(),
            corporateBodyCreator = creatorCorporateBodyBuilder.build(),

            agentSubject = subjectAgentBuilder.build(),
            agentProducer = producersAgentBuilder.build(),
            agentPublisher = publisherAgentBuilder.build(),
            agentContributor = contributorAgentBuilder.build(),
            agentCreator = creatorAgentBuilder.build(),
317
318

            // DIGITAL & PHYSICAL
319
            access = accessDigital + accessPhysical,
320
321
322

            // DIGITAL
            accessDigital = accessDigital,
323
            durationDigital = Extract.listOfStrings(digitalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
324
            colourDigital = Extract.listOfStrings(digitalObject?.get(Constants.color)),
Jonas Waeber's avatar
Jonas Waeber committed
325
            digitalObjectNote = Extract.languageContainer("descriptiveNote", digitalObject?.get("descriptiveNote")),
Jonas Waeber's avatar
Jonas Waeber committed
326
            locator = if (addLocator) locator else null,
Jonas Waeber's avatar
Jonas Waeber committed
327
            mediaLocation = mediaLocation,
328
329
330
331
            usageConditionsDigital = Extract.languageContainer(
                "conditionsOfUse",
                digitalObject?.get("conditionsOfUse")
            ),
332
            usageDigital = usageDigital,
333
            usageDigitalGroup = usageDigital.map { translationMappers.getReuseStatement(it) },
334

Jonas Waeber's avatar
Jonas Waeber committed
335
336
            digital = digitalObjectValues,

337
338
            // PHYSICAL
            accessPhysical = accessPhysical,
339
            durationPhysical = Extract.listOfStrings(physicalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
340
            colourPhysical = Extract.languageContainer("P60558", physicalObject?.get("P60558")),
341
342
343
344
            physicalCharacteristics = Extract.languageContainer(
                "physicalCharacteristics",
                physicalObject?.get("physicalCharacteristics")
            ),
345
346
347
348
            physicalObjectNote = Extract.languageContainer(
                "descriptiveNote",
                physicalObject?.get("descriptiveNote")
            ),
349
350
351
352
            usageConditionsPhysical = Extract.languageContainer(
                "conditionsOfUse",
                physicalObject?.get("conditionsOfUse")
            ),
353
354
            usagePhysical = Extract.typedEntityByType(physicalRules, "type", "usage", "sameAs")
                .flatMap { it.toList() },
355
356
            callNumber = Extract.typedEntityByType(physicalIdentifiers, "type", "callNumber", "identifier")
                .flatMap { it.toList() },
357
358
359
360
361
362
363
364
365
366
367
368
            accessInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(
                recordSetId,
                "accessInstitution"
            ),
            originalInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(
                recordSetId,
                "originalInstitution"
            ),
            masterInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(
                recordSetId,
                "masterInstitution"
            ),
369

Jonas Waeber's avatar
Jonas Waeber committed
370
            published = (record[Constants.isPublished] as Boolean?) ?: false,
Jonas Waeber's avatar
Jonas Waeber committed
371
            suggest = suggestContainerBuilder.build()[0]
Jonas Waeber's avatar
Jonas Waeber committed
372
373
374
        )
    }
}