DocumentsSearchDocBuilder.kt 17.3 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase

21
import ch.memobase.rdf.NS
22
import ch.memobase.rdf.RICO
Jonas Waeber's avatar
Jonas Waeber committed
23
24
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
Jonas Waeber's avatar
Jonas Waeber committed
25
26
27
28
29
30
31
32
33
import org.memobase.builders.AgentContainerBuilder
import org.memobase.builders.DateContainerBuilder
import org.memobase.builders.EnrichedFacetContainerBuilder
import org.memobase.builders.FacettedContainerBuilder
import org.memobase.builders.IFieldBuilder
import org.memobase.builders.PersonFacetBuilder
import org.memobase.builders.PlaceFacetBuilder
import org.memobase.builders.SuggestContainerBuilder
import org.memobase.helpers.AspectRatio
Jonas Waeber's avatar
Jonas Waeber committed
34
import org.memobase.helpers.Constants
35
import org.memobase.helpers.ElasticSearchWrapper
Jonas Waeber's avatar
Jonas Waeber committed
36
37
38
39
40
import org.memobase.helpers.Extract
import org.memobase.helpers.FacetBuildHelpers
import org.memobase.helpers.Filter
import org.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractInstitution
import org.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractRecordSet
41
import org.memobase.helpers.JsonUtility
42
import org.memobase.helpers.TranslationMappers
43
import org.memobase.model.DocumentsSearchDoc
Jonas Waeber's avatar
Jonas Waeber committed
44
import org.memobase.model.EnrichedDigitalMetadata
45
import org.memobase.model.FacetContainer
46
import org.memobase.model.LanguageContainer
47
import org.memobase.model.Schema
Jonas Waeber's avatar
Jonas Waeber committed
48

49
class DocumentsSearchDocBuilder(
50
    private val translationMappers: TranslationMappers,
51
    private val elasticSearchWrapper: ElasticSearchWrapper,
52
53
    private val mediaUrl: String
) {
Jonas Waeber's avatar
Jonas Waeber committed
54
    private val log = LogManager.getLogger(this::class.java)
55

56
    fun transform(key: String, input: Map<String, JsonObject>): Schema {
57

Jonas Waeber's avatar
Jonas Waeber committed
58
59
        val record = input[JsonUtility.recordTag] ?:
            throw InvalidInputException("No record defined in the message $key.")
60
        val digitalObject =
Jonas Waeber's avatar
Jonas Waeber committed
61
62
            input.values.firstOrNull { it[Constants.atType] == RICO.Instantiation.uri
                    && it[RICO.type.localName] == RICO.Types.Instantiation.digitalObject }
63
        val physicalObject =
Jonas Waeber's avatar
Jonas Waeber committed
64
65
            input.values.firstOrNull { it[Constants.atType] == RICO.Instantiation.uri
                    && it[RICO.type.localName] == RICO.Types.Instantiation.physicalObject }
66

Jonas Waeber's avatar
Jonas Waeber committed
67
68
69
70
71
72
73
74
75
        val keywordIds = Extract.identifiers(record[Constants.hasSubject])
        val genreIds = Extract.identifiers(record[Constants.hasGenre])
        val publishedByIds = Extract.identifiers(record[Constants.publishedBy])
        val producerIds = Extract.identifiers(record[Constants.producer])
        val spatialIds = Extract.identifiers(record[Constants.spatial])
        val placeOfCaptureIds = Extract.identifiers(record[Constants.placeOfCapture])
        val dateCreatedIds = Extract.identifiers(record[Constants.created])
        val dateIssuedIds = Extract.identifiers(record[Constants.issued])
        val temporalIds = Extract.identifiers(record[Constants.temporal])
Jonas Waeber's avatar
Jonas Waeber committed
76

77
        val personFacetBuilder = PersonFacetBuilder()
Jonas Waeber's avatar
Jonas Waeber committed
78
79
80
        val subjectPersonBuilder = AgentContainerBuilder(keywordIds, Constants.Person, null, input)
        val publisherPersonBuilder = AgentContainerBuilder(publishedByIds, Constants.Person, null, input)
        val producersPersonBuilder = AgentContainerBuilder(producerIds, Constants.Person, null, input)
Jonas Waeber's avatar
Jonas Waeber committed
81
82
        val contributorPersonBuilder =
            AgentContainerBuilder(emptyList(), Constants.Person, Constants.contributor, input)
Jonas Waeber's avatar
Jonas Waeber committed
83
84
85
86
87
        val creatorPersonBuilder = AgentContainerBuilder(emptyList(), Constants.Person, Constants.creator, input)

        val subjectCorporateBodyBuilder = AgentContainerBuilder(keywordIds, Constants.CorporateBody, null, input)
        val publisherCorporateBodyBuilder = AgentContainerBuilder(publishedByIds, Constants.CorporateBody, null, input)
        val producersCorporateBodyBuilder = AgentContainerBuilder(producerIds, Constants.CorporateBody, null, input)
88
        val contributorCorporateBodyBuilder =
Jonas Waeber's avatar
Jonas Waeber committed
89
            AgentContainerBuilder(emptyList(), Constants.CorporateBody, Constants.contributor, input)
Jonas Waeber's avatar
Jonas Waeber committed
90
91
        val creatorCorporateBodyBuilder =
            AgentContainerBuilder(emptyList(), Constants.CorporateBody, Constants.creator, input)
92

Jonas Waeber's avatar
Jonas Waeber committed
93
94
95
96
97
        val subjectAgentBuilder = AgentContainerBuilder(keywordIds, Constants.Agent, null, input)
        val publisherAgentBuilder = AgentContainerBuilder(publishedByIds, Constants.Agent, null, input)
        val producersAgentBuilder = AgentContainerBuilder(producerIds, Constants.Agent, null, input)
        val contributorAgentBuilder = AgentContainerBuilder(emptyList(), Constants.Agent, Constants.contributor, input)
        val creatorAgentBuilder = AgentContainerBuilder(emptyList(), Constants.Agent, Constants.creator, input)
98

Jonas Waeber's avatar
Jonas Waeber committed
99
100
        val placesRelatedBuilder =
            FacettedContainerBuilder(spatialIds, Constants.Place, Constants.name, FacetBuildHelpers::place)
101
        val placeCapturedBuilder =
Jonas Waeber's avatar
Jonas Waeber committed
102
            FacettedContainerBuilder(placeOfCaptureIds, Constants.Place, Constants.name, FacetBuildHelpers::place)
Jonas Waeber's avatar
Jonas Waeber committed
103

104
105
        val placeFacetBuilder = PlaceFacetBuilder()

Jonas Waeber's avatar
Jonas Waeber committed
106
107
108
109
        val dateCreatedBuilder = DateContainerBuilder(dateCreatedIds)
        val dateIssuedBuilder = DateContainerBuilder(dateIssuedIds)
        val temporalBuilder = DateContainerBuilder(temporalIds)

110
        val suggestContainerBuilder = SuggestContainerBuilder(keywordIds)
Jonas Waeber's avatar
Jonas Waeber committed
111

112
113
114
        val digitalIdentifierReferences = Extract.identifiers(digitalObject?.get("identifiedBy"))
        val digitalIdentifierEntities = mutableListOf<JsonObject>()

Jonas Waeber's avatar
Jonas Waeber committed
115
116
117
        val formats = EnrichedFacetContainerBuilder(emptyList(), NS.rico + Constants.CarrierType, Constants.name, input)
        val languages = EnrichedFacetContainerBuilder(emptyList(), NS.rico + Constants.Language, Constants.name, input)
        val genres = EnrichedFacetContainerBuilder(genreIds, NS.skos + Constants.Concept, Constants.prefLabel, input)
118

119
        for (item in input.entries) {
120
121
122
123
124
125
126
            for (builder: IFieldBuilder in listOf(
                personFacetBuilder,
                subjectPersonBuilder,
                publisherPersonBuilder,
                producersPersonBuilder,
                contributorPersonBuilder,
                creatorPersonBuilder,
127
128
129
130
131
132
133
134
135
136
137
                subjectCorporateBodyBuilder,
                publisherCorporateBodyBuilder,
                producersCorporateBodyBuilder,
                contributorCorporateBodyBuilder,
                creatorCorporateBodyBuilder,
                subjectAgentBuilder,
                publisherAgentBuilder,
                producersAgentBuilder,
                contributorAgentBuilder,
                creatorAgentBuilder,

Jonas Waeber's avatar
Jonas Waeber committed
138
139
                placeFacetBuilder,
                placeCapturedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
140
                placesRelatedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
141
142
143
                dateCreatedBuilder,
                dateIssuedBuilder,
                temporalBuilder,
144
145
146
147
                suggestContainerBuilder,
                formats,
                genres,
                languages
148
            )) {
149
                if (builder.filter(item.value)) {
150
                    builder.append(key, item.value)
151
                }
152
153
154

                if (digitalIdentifierReferences.contains(item.key))
                    digitalIdentifierEntities.add(item.value)
155
156
            }
        }
157

Jonas Waeber's avatar
Jonas Waeber committed
158
159
160
        val recordIdentifiers = Filter.entitiesByProperty("identifiedBy", record, input)
        val recordTitles = Filter.entitiesByProperty("hasTitle", record, input)
        val recordRules = Filter.entitiesByProperty("regulatedBy", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
161

Jonas Waeber's avatar
Jonas Waeber committed
162
        val subjects = Filter.entitiesByProperty("hasSubject", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
163

Jonas Waeber's avatar
Jonas Waeber committed
164
        val digitalRules = Filter.entitiesByProperty("regulatedBy", digitalObject, input)
165

Jonas Waeber's avatar
Jonas Waeber committed
166
167
        val physicalRules = Filter.entitiesByProperty("regulatedBy", physicalObject, input)
        val physicalIdentifiers = Filter.entitiesByProperty("identifiedBy", physicalObject, input)
168

169
170
171
172
        val accessPhysical = Extract.typedEntityByType(physicalRules, "type", "access", "name")
            .flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
        val accessDigital = Extract.typedEntityByType(digitalRules, "type", "access", "name")
            .flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
173

174
        val usageDigital = Extract.typedEntityByType(digitalRules, "type", "usage", "sameAs").flatMap { it.toList() }
175

176
        val locator = try {
Jonas Waeber's avatar
Jonas Waeber committed
177
            val value = Extract.extractIdValue(digitalIdentifierEntities, Constants.IdentifierType.main)
178
179
180
181
182
183
184
185
            if (value == null)
                ""
            else
                "${mediaUrl}${value}"
        } catch (ex: NoSuchElementException) {
            ""
        }

Jonas Waeber's avatar
Jonas Waeber committed
186
        val addLocator = Filter.checkLocator(digitalObject)
Jonas Waeber's avatar
Jonas Waeber committed
187
188
189
190
191
192
193
194
195
        val mediaLocation = if (addLocator) {
            if (Filter.checkSftpPrefix(digitalObject!!)) {
                "local"
            } else {
                "remote"
            }
        } else {
            null
        }
Jonas Waeber's avatar
Jonas Waeber committed
196

Jonas Waeber's avatar
Jonas Waeber committed
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
        val digitalObjectValues = digitalObject.let {
            if (it != null) {
                val width = it.getOrDefault("width", "") as String
                val height = it.getOrDefault("height", "") as String
                EnrichedDigitalMetadata(
                    hasFormat = it.getOrDefault("hasFormat", "") as String,
                    isDistributedOn = it.getOrDefault("isDistributedOn", "") as String,
                    hasMimeType = it.getOrDefault("hasMimeType", "") as String,
                    height = height,
                    width = width,
                    aspectRatio = AspectRatio.asFraction(width, height),
                    mediaResourceDescription = it.getOrDefault("mediaResourceDescription", "") as String,
                    orientation = it.getOrDefault("orientation", "") as String,
                    hasColourContent = it.getOrDefault("P60558", "") as String,
                    componentColor = Extract.listOfStrings(digitalObject?.get("componentColor"))

                )
            } else {
                EnrichedDigitalMetadata()
            }
        }

Jonas Waeber's avatar
Jonas Waeber committed
219
        val type = record[Constants.ricoType].let {
220
            if (it == null) {
221
                translationMappers.getDocumentType("Andere")
222
            } else {
223
                translationMappers.getDocumentType(it as String)
224
225
226
            }
        }

227
228
        val recordSetId = extractRecordSet(record)

229
        return DocumentsSearchDoc(
230
231
232
            title = Extract.typedEntityByType(recordTitles, "type", "main", "title"),
            seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"),
            broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"),
233
            type = type,
Jonas Waeber's avatar
Jonas Waeber committed
234
            sourceID = try {
Jonas Waeber's avatar
Jonas Waeber committed
235
                Extract.extractIdValue(recordIdentifiers, Constants.IdentifierType.original) ?: "NoSourceIdFound"
Jonas Waeber's avatar
Jonas Waeber committed
236
            } catch (ex: NoSuchElementException) {
Jonas Waeber's avatar
Jonas Waeber committed
237
                log.error("No source id found for record $key.")
Jonas Waeber's avatar
Jonas Waeber committed
238
239
                "NoSourceIdFound"
            },
Jonas Waeber's avatar
Jonas Waeber committed
240
            oldMemobaseId = try {
Jonas Waeber's avatar
Jonas Waeber committed
241
                Extract.extractIdValue(recordIdentifiers, Constants.IdentifierType.oldMemobase) ?: ""
Jonas Waeber's avatar
Jonas Waeber committed
242
243
244
245
            } catch (ex: NoSuchElementException) {
                log.warn("No old memobase id found for record $key.")
                ""
            },
246
            sameAs = Extract.listOfStrings(record["sameAs"]),
247
            abstract = Extract.languageContainer("abstract (record id: $key)", record["abstract"]),
248
            recordId = key,
249
250
251
252
253
254
255
256
257
258
259
260
261
262
            institution = extractInstitution(record).map { value -> elasticSearchWrapper.getInstitutionName(value) },
            recordSet = FacetContainer(
                elasticSearchWrapper.getRecordSetName(recordSetId),
                null,
                if (recordSetId != "") listOf(recordSetId) else emptyList()
            ),
            descriptiveNote = Extract.languageContainer(
                "descriptiveNote (record id: $key)",
                record["descriptiveNote"]
            ),
            scopeAndContent = Extract.languageContainer(
                "scopeAndContent (record id: $key)",
                record["scopeAndContent"]
            ),
263
264
            relatedMaterial = Extract.languageContainer("relation (record id: $key)", record["relation"]),
            source = Extract.languageContainer("source (record id: $key)", record["source"]),
Jonas Waeber's avatar
Jonas Waeber committed
265
266
267
            temporal = temporalBuilder.build(),
            dateCreated = dateCreatedBuilder.build(),
            dateIssued = dateIssuedBuilder.build(),
Jonas Waeber's avatar
Jonas Waeber committed
268
269
            placeCapture = placeCapturedBuilder.build(),
            placeRelated = placesRelatedBuilder.build(),
270
            placeFacet = placeFacetBuilder.build(),
271
            rightsHolder = Extract.typedEntityByType(recordRules, "type", "holder", "name"),
Jonas Waeber's avatar
Jonas Waeber committed
272
273
            conditionsOfUse = Extract.languageContainer(
                "conditionsOfUse (record id: $key)",
Jonas Waeber's avatar
Jonas Waeber committed
274
                record[Constants.conditionsOfUse]
Jonas Waeber's avatar
Jonas Waeber committed
275
            ),
Jonas Waeber's avatar
Jonas Waeber committed
276
            memoriavClaim = record[Constants.sponsoredBy] != null,
Jonas Waeber's avatar
Jonas Waeber committed
277

278
279
280
            format = formats.build(),
            language = languages.build(),
            genre = genres.build(),
281

Jonas Waeber's avatar
Jonas Waeber committed
282
283
284
            keywords = subjects.flatMap {
                Extract.languageContainer(
                    "hasSubject (record id: $key)",
Jonas Waeber's avatar
Jonas Waeber committed
285
                    it[Constants.prefLabel]
Jonas Waeber's avatar
Jonas Waeber committed
286
287
                )
            }.let {
288
289
290
                if (it.isEmpty())
                    LanguageContainer.EMPTY
                else
291
292
293
294
                    it.reduce { acc, languageContainer ->
                        acc.merge(languageContainer)
                    }
            },
295
296
297
298
299
300
301
            personSubject = subjectPersonBuilder.build(),
            personProducer = producersPersonBuilder.build(),
            personPublisher = publisherPersonBuilder.build(),
            personContributor = contributorPersonBuilder.build(),
            personCreator = creatorPersonBuilder.build(),
            personsFacet = personFacetBuilder.build(),

302
303
304
305
306
307
308
309
310
311
312
            corporateBodySubject = subjectCorporateBodyBuilder.build(),
            corporateBodyProducer = producersCorporateBodyBuilder.build(),
            corporateBodyPublisher = publisherCorporateBodyBuilder.build(),
            corporateBodyContributor = contributorCorporateBodyBuilder.build(),
            corporateBodyCreator = creatorCorporateBodyBuilder.build(),

            agentSubject = subjectAgentBuilder.build(),
            agentProducer = producersAgentBuilder.build(),
            agentPublisher = publisherAgentBuilder.build(),
            agentContributor = contributorAgentBuilder.build(),
            agentCreator = creatorAgentBuilder.build(),
313
314

            // DIGITAL & PHYSICAL
315
            access = accessDigital + accessPhysical,
316
317
318

            // DIGITAL
            accessDigital = accessDigital,
319
            durationDigital = Extract.listOfStrings(digitalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
320
            colourDigital = Extract.listOfStrings(digitalObject?.get(Constants.color)),
Jonas Waeber's avatar
Jonas Waeber committed
321
            digitalObjectNote = Extract.languageContainer("descriptiveNote", digitalObject?.get("descriptiveNote")),
Jonas Waeber's avatar
Jonas Waeber committed
322
            locator = if (addLocator) locator else null,
Jonas Waeber's avatar
Jonas Waeber committed
323
            mediaLocation = mediaLocation,
324
325
326
327
            usageConditionsDigital = Extract.languageContainer(
                "conditionsOfUse",
                digitalObject?.get("conditionsOfUse")
            ),
328
            usageDigital = usageDigital,
329
            usageDigitalGroup = usageDigital.map { translationMappers.getReuseStatement(it) },
330

Jonas Waeber's avatar
Jonas Waeber committed
331
332
            digital = digitalObjectValues,

333
334
            // PHYSICAL
            accessPhysical = accessPhysical,
335
            durationPhysical = Extract.listOfStrings(physicalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
336
            colourPhysical = Extract.languageContainer("P60558", physicalObject?.get("P60558")),
337
338
339
340
            physicalCharacteristics = Extract.languageContainer(
                "physicalCharacteristics",
                physicalObject?.get("physicalCharacteristics")
            ),
341
342
343
344
            physicalObjectNote = Extract.languageContainer(
                "descriptiveNote",
                physicalObject?.get("descriptiveNote")
            ),
345
346
347
348
            usageConditionsPhysical = Extract.languageContainer(
                "conditionsOfUse",
                physicalObject?.get("conditionsOfUse")
            ),
349
350
            usagePhysical = Extract.typedEntityByType(physicalRules, "type", "usage", "sameAs")
                .flatMap { it.toList() },
351
352
            callNumber = Extract.typedEntityByType(physicalIdentifiers, "type", "callNumber", "identifier")
                .flatMap { it.toList() },
Jonas Waeber's avatar
Jonas Waeber committed
353
354
355
            accessInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(recordSetId, "accessInstitution"),
            originalInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(recordSetId, "originalInstitution"),
            masterInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(recordSetId, "masterInstitution"),
356

Jonas Waeber's avatar
Jonas Waeber committed
357
            published = (record[Constants.isPublished] as Boolean?) ?: false,
Jonas Waeber's avatar
Jonas Waeber committed
358
            suggest = suggestContainerBuilder.build()[0]
Jonas Waeber's avatar
Jonas Waeber committed
359
360
361
        )
    }
}