DocumentsSearchDocBuilder.kt 17.2 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase

21
import ch.memobase.rdf.NS
22
import ch.memobase.rdf.RICO
Jonas Waeber's avatar
Jonas Waeber committed
23
24
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
Jonas Waeber's avatar
Jonas Waeber committed
25
26
27
28
29
30
31
32
33
import org.memobase.builders.AgentContainerBuilder
import org.memobase.builders.DateContainerBuilder
import org.memobase.builders.EnrichedFacetContainerBuilder
import org.memobase.builders.FacettedContainerBuilder
import org.memobase.builders.IFieldBuilder
import org.memobase.builders.PersonFacetBuilder
import org.memobase.builders.PlaceFacetBuilder
import org.memobase.builders.SuggestContainerBuilder
import org.memobase.helpers.AspectRatio
34
import org.memobase.helpers.ElasticSearchWrapper
Jonas Waeber's avatar
Jonas Waeber committed
35
36
37
38
39
import org.memobase.helpers.Extract
import org.memobase.helpers.FacetBuildHelpers
import org.memobase.helpers.Filter
import org.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractInstitution
import org.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractRecordSet
Jonas Waeber's avatar
Jonas Waeber committed
40
import org.memobase.helpers.Constants
41
import org.memobase.helpers.JsonUtility
42
import org.memobase.helpers.TranslationMappers
43
import org.memobase.model.DocumentsSearchDoc
Jonas Waeber's avatar
Jonas Waeber committed
44
import org.memobase.model.EnrichedDigitalMetadata
45
import org.memobase.model.FacetContainer
46
import org.memobase.model.LanguageContainer
47
import org.memobase.model.Schema
Jonas Waeber's avatar
Jonas Waeber committed
48

49
class DocumentsSearchDocBuilder(
50
    private val translationMappers: TranslationMappers,
51
    private val elasticSearchWrapper: ElasticSearchWrapper,
52
53
    private val mediaUrl: String
) {
Jonas Waeber's avatar
Jonas Waeber committed
54
    private val log = LogManager.getLogger("SearchDocTransform")
55

56
    fun transform(key: String, input: Map<String, JsonObject>): Schema {
57

58
        val record = input[JsonUtility.recordTag] ?: throw InvalidInputException("No record defined in the message.")
59
        val digitalObject =
60
            input.values.firstOrNull { it[Constants.atType] == RICO.Instantiation.uri && it[RICO.type.localName] == RICO.Types.Instantiation.digitalObject }
61
        val physicalObject =
62
            input.values.firstOrNull { it[Constants.atType] == RICO.Instantiation.uri && it[RICO.type.localName] == RICO.Types.Instantiation.physicalObject }
63

Jonas Waeber's avatar
Jonas Waeber committed
64
65
66
67
68
69
70
71
72
        val keywordIds = Extract.identifiers(record[Constants.hasSubject])
        val genreIds = Extract.identifiers(record[Constants.hasGenre])
        val publishedByIds = Extract.identifiers(record[Constants.publishedBy])
        val producerIds = Extract.identifiers(record[Constants.producer])
        val spatialIds = Extract.identifiers(record[Constants.spatial])
        val placeOfCaptureIds = Extract.identifiers(record[Constants.placeOfCapture])
        val dateCreatedIds = Extract.identifiers(record[Constants.created])
        val dateIssuedIds = Extract.identifiers(record[Constants.issued])
        val temporalIds = Extract.identifiers(record[Constants.temporal])
Jonas Waeber's avatar
Jonas Waeber committed
73

74
        val personFacetBuilder = PersonFacetBuilder()
Jonas Waeber's avatar
Jonas Waeber committed
75
76
77
78
79
80
81
82
83
        val subjectPersonBuilder = AgentContainerBuilder(keywordIds, Constants.Person, null, input)
        val publisherPersonBuilder = AgentContainerBuilder(publishedByIds, Constants.Person, null, input)
        val producersPersonBuilder = AgentContainerBuilder(producerIds, Constants.Person, null, input)
        val contributorPersonBuilder = AgentContainerBuilder(emptyList(), Constants.Person, Constants.contributor, input)
        val creatorPersonBuilder = AgentContainerBuilder(emptyList(), Constants.Person, Constants.creator, input)

        val subjectCorporateBodyBuilder = AgentContainerBuilder(keywordIds, Constants.CorporateBody, null, input)
        val publisherCorporateBodyBuilder = AgentContainerBuilder(publishedByIds, Constants.CorporateBody, null, input)
        val producersCorporateBodyBuilder = AgentContainerBuilder(producerIds, Constants.CorporateBody, null, input)
84
        val contributorCorporateBodyBuilder =
Jonas Waeber's avatar
Jonas Waeber committed
85
86
            AgentContainerBuilder(emptyList(), Constants.CorporateBody, Constants.contributor, input)
        val creatorCorporateBodyBuilder = AgentContainerBuilder(emptyList(), Constants.CorporateBody, Constants.creator, input)
87

Jonas Waeber's avatar
Jonas Waeber committed
88
89
90
91
92
        val subjectAgentBuilder = AgentContainerBuilder(keywordIds, Constants.Agent, null, input)
        val publisherAgentBuilder = AgentContainerBuilder(publishedByIds, Constants.Agent, null, input)
        val producersAgentBuilder = AgentContainerBuilder(producerIds, Constants.Agent, null, input)
        val contributorAgentBuilder = AgentContainerBuilder(emptyList(), Constants.Agent, Constants.contributor, input)
        val creatorAgentBuilder = AgentContainerBuilder(emptyList(), Constants.Agent, Constants.creator, input)
93

Jonas Waeber's avatar
Jonas Waeber committed
94
        val placesRelatedBuilder = FacettedContainerBuilder(spatialIds, Constants.Place, Constants.name, FacetBuildHelpers::place)
95
        val placeCapturedBuilder =
Jonas Waeber's avatar
Jonas Waeber committed
96
            FacettedContainerBuilder(placeOfCaptureIds, Constants.Place, Constants.name, FacetBuildHelpers::place)
Jonas Waeber's avatar
Jonas Waeber committed
97

98
99
        val placeFacetBuilder = PlaceFacetBuilder()

Jonas Waeber's avatar
Jonas Waeber committed
100
101
102
103
        val dateCreatedBuilder = DateContainerBuilder(dateCreatedIds)
        val dateIssuedBuilder = DateContainerBuilder(dateIssuedIds)
        val temporalBuilder = DateContainerBuilder(temporalIds)

104
        val suggestContainerBuilder = SuggestContainerBuilder(keywordIds)
Jonas Waeber's avatar
Jonas Waeber committed
105

106
107
108
        val digitalIdentifierReferences = Extract.identifiers(digitalObject?.get("identifiedBy"))
        val digitalIdentifierEntities = mutableListOf<JsonObject>()

Jonas Waeber's avatar
Jonas Waeber committed
109
110
111
        val formats = EnrichedFacetContainerBuilder(emptyList(), NS.rico + Constants.CarrierType, Constants.name, input)
        val languages = EnrichedFacetContainerBuilder(emptyList(), NS.rico + Constants.Language, Constants.name, input)
        val genres = EnrichedFacetContainerBuilder(genreIds, NS.skos + Constants.Concept, Constants.prefLabel, input)
112

113
        for (item in input.entries) {
114
115
116
117
118
119
120
            for (builder: IFieldBuilder in listOf(
                personFacetBuilder,
                subjectPersonBuilder,
                publisherPersonBuilder,
                producersPersonBuilder,
                contributorPersonBuilder,
                creatorPersonBuilder,
121
122
123
124
125
126
127
128
129
130
131
                subjectCorporateBodyBuilder,
                publisherCorporateBodyBuilder,
                producersCorporateBodyBuilder,
                contributorCorporateBodyBuilder,
                creatorCorporateBodyBuilder,
                subjectAgentBuilder,
                publisherAgentBuilder,
                producersAgentBuilder,
                contributorAgentBuilder,
                creatorAgentBuilder,

Jonas Waeber's avatar
Jonas Waeber committed
132
133
                placeFacetBuilder,
                placeCapturedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
134
                placesRelatedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
135
136
137
                dateCreatedBuilder,
                dateIssuedBuilder,
                temporalBuilder,
138
139
140
141
                suggestContainerBuilder,
                formats,
                genres,
                languages
142
            )) {
143
                if (builder.filter(item.value)) {
144
                    builder.append(key, item.value)
145
                }
146
147
148

                if (digitalIdentifierReferences.contains(item.key))
                    digitalIdentifierEntities.add(item.value)
149
150
            }
        }
151

Jonas Waeber's avatar
Jonas Waeber committed
152
153
154
        val recordIdentifiers = Filter.entitiesByProperty("identifiedBy", record, input)
        val recordTitles = Filter.entitiesByProperty("hasTitle", record, input)
        val recordRules = Filter.entitiesByProperty("regulatedBy", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
155

Jonas Waeber's avatar
Jonas Waeber committed
156
        val subjects = Filter.entitiesByProperty("hasSubject", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
157

Jonas Waeber's avatar
Jonas Waeber committed
158
        val digitalRules = Filter.entitiesByProperty("regulatedBy", digitalObject, input)
159

Jonas Waeber's avatar
Jonas Waeber committed
160
161
        val physicalRules = Filter.entitiesByProperty("regulatedBy", physicalObject, input)
        val physicalIdentifiers = Filter.entitiesByProperty("identifiedBy", physicalObject, input)
162

163
164
165
166
        val accessPhysical = Extract.typedEntityByType(physicalRules, "type", "access", "name")
            .flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
        val accessDigital = Extract.typedEntityByType(digitalRules, "type", "access", "name")
            .flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
167

168
        val usageDigital = Extract.typedEntityByType(digitalRules, "type", "usage", "sameAs").flatMap { it.toList() }
169

170
        val locator = try {
Jonas Waeber's avatar
Jonas Waeber committed
171
            val value = Extract.extractIdValue(digitalIdentifierEntities, Constants.IdentifierType.main)
172
173
174
175
176
177
178
179
            if (value == null)
                ""
            else
                "${mediaUrl}${value}"
        } catch (ex: NoSuchElementException) {
            ""
        }

Jonas Waeber's avatar
Jonas Waeber committed
180
        val addLocator = Filter.checkLocator(digitalObject)
Jonas Waeber's avatar
Jonas Waeber committed
181
182
183
184
185
186
187
188
189
        val mediaLocation = if (addLocator) {
            if (Filter.checkSftpPrefix(digitalObject!!)) {
                "local"
            } else {
                "remote"
            }
        } else {
            null
        }
Jonas Waeber's avatar
Jonas Waeber committed
190

Jonas Waeber's avatar
Jonas Waeber committed
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
        val digitalObjectValues = digitalObject.let {
            if (it != null) {
                val width = it.getOrDefault("width", "") as String
                val height = it.getOrDefault("height", "") as String
                EnrichedDigitalMetadata(
                    hasFormat = it.getOrDefault("hasFormat", "") as String,
                    isDistributedOn = it.getOrDefault("isDistributedOn", "") as String,
                    hasMimeType = it.getOrDefault("hasMimeType", "") as String,
                    height = height,
                    width = width,
                    aspectRatio = AspectRatio.asFraction(width, height),
                    mediaResourceDescription = it.getOrDefault("mediaResourceDescription", "") as String,
                    orientation = it.getOrDefault("orientation", "") as String,
                    hasColourContent = it.getOrDefault("P60558", "") as String,
                    componentColor = Extract.listOfStrings(digitalObject?.get("componentColor"))

                )
            } else {
                EnrichedDigitalMetadata()
            }
        }

Jonas Waeber's avatar
Jonas Waeber committed
213
        val type = record[Constants.ricoType].let {
214
            if (it == null) {
215
                translationMappers.getDocumentType("Andere")
216
            } else {
217
                translationMappers.getDocumentType(it as String)
218
219
220
            }
        }

221
222
        val recordSetId = extractRecordSet(record)

223
        return DocumentsSearchDoc(
224
225
226
            title = Extract.typedEntityByType(recordTitles, "type", "main", "title"),
            seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"),
            broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"),
227
            type = type,
Jonas Waeber's avatar
Jonas Waeber committed
228
            sourceID = try {
Jonas Waeber's avatar
Jonas Waeber committed
229
                Extract.extractIdValue(recordIdentifiers, Constants.IdentifierType.original) ?: "NoSourceIdFound"
Jonas Waeber's avatar
Jonas Waeber committed
230
            } catch (ex: NoSuchElementException) {
Jonas Waeber's avatar
Jonas Waeber committed
231
                log.error("No source id found for record $key.")
Jonas Waeber's avatar
Jonas Waeber committed
232
233
                "NoSourceIdFound"
            },
Jonas Waeber's avatar
Jonas Waeber committed
234
            oldMemobaseId = try {
Jonas Waeber's avatar
Jonas Waeber committed
235
                Extract.extractIdValue(recordIdentifiers, Constants.IdentifierType.oldMemobase) ?: ""
Jonas Waeber's avatar
Jonas Waeber committed
236
237
238
239
            } catch (ex: NoSuchElementException) {
                log.warn("No old memobase id found for record $key.")
                ""
            },
240
            sameAs = Extract.listOfStrings(record["sameAs"]),
241
            abstract = Extract.languageContainer("abstract (record id: $key)", record["abstract"]),
242
            recordId = key,
243
244
245
246
247
248
249
250
251
252
253
254
255
256
            institution = extractInstitution(record).map { value -> elasticSearchWrapper.getInstitutionName(value) },
            recordSet = FacetContainer(
                elasticSearchWrapper.getRecordSetName(recordSetId),
                null,
                if (recordSetId != "") listOf(recordSetId) else emptyList()
            ),
            descriptiveNote = Extract.languageContainer(
                "descriptiveNote (record id: $key)",
                record["descriptiveNote"]
            ),
            scopeAndContent = Extract.languageContainer(
                "scopeAndContent (record id: $key)",
                record["scopeAndContent"]
            ),
257
258
            relatedMaterial = Extract.languageContainer("relation (record id: $key)", record["relation"]),
            source = Extract.languageContainer("source (record id: $key)", record["source"]),
Jonas Waeber's avatar
Jonas Waeber committed
259
260
261
            temporal = temporalBuilder.build(),
            dateCreated = dateCreatedBuilder.build(),
            dateIssued = dateIssuedBuilder.build(),
Jonas Waeber's avatar
Jonas Waeber committed
262
263
            placeCapture = placeCapturedBuilder.build(),
            placeRelated = placesRelatedBuilder.build(),
264
            placeFacet = placeFacetBuilder.build(),
265
            rightsHolder = Extract.typedEntityByType(recordRules, "type", "holder", "name"),
Jonas Waeber's avatar
Jonas Waeber committed
266
267
            conditionsOfUse = Extract.languageContainer(
                "conditionsOfUse (record id: $key)",
Jonas Waeber's avatar
Jonas Waeber committed
268
                record[Constants.conditionsOfUse]
Jonas Waeber's avatar
Jonas Waeber committed
269
            ),
Jonas Waeber's avatar
Jonas Waeber committed
270
            memoriavClaim = record[Constants.sponsoredBy] != null,
Jonas Waeber's avatar
Jonas Waeber committed
271

272
273
274
            format = formats.build(),
            language = languages.build(),
            genre = genres.build(),
275

Jonas Waeber's avatar
Jonas Waeber committed
276
277
278
            keywords = subjects.flatMap {
                Extract.languageContainer(
                    "hasSubject (record id: $key)",
Jonas Waeber's avatar
Jonas Waeber committed
279
                    it[Constants.prefLabel]
Jonas Waeber's avatar
Jonas Waeber committed
280
281
                )
            }.let {
282
283
284
                if (it.isEmpty())
                    LanguageContainer.EMPTY
                else
285
286
287
288
                    it.reduce { acc, languageContainer ->
                        acc.merge(languageContainer)
                    }
            },
289
290
291
292
293
294
295
            personSubject = subjectPersonBuilder.build(),
            personProducer = producersPersonBuilder.build(),
            personPublisher = publisherPersonBuilder.build(),
            personContributor = contributorPersonBuilder.build(),
            personCreator = creatorPersonBuilder.build(),
            personsFacet = personFacetBuilder.build(),

296
297
298
299
300
301
302
303
304
305
306
            corporateBodySubject = subjectCorporateBodyBuilder.build(),
            corporateBodyProducer = producersCorporateBodyBuilder.build(),
            corporateBodyPublisher = publisherCorporateBodyBuilder.build(),
            corporateBodyContributor = contributorCorporateBodyBuilder.build(),
            corporateBodyCreator = creatorCorporateBodyBuilder.build(),

            agentSubject = subjectAgentBuilder.build(),
            agentProducer = producersAgentBuilder.build(),
            agentPublisher = publisherAgentBuilder.build(),
            agentContributor = contributorAgentBuilder.build(),
            agentCreator = creatorAgentBuilder.build(),
307
308

            // DIGITAL & PHYSICAL
309
            access = accessDigital + accessPhysical,
310
311
312

            // DIGITAL
            accessDigital = accessDigital,
313
            durationDigital = Extract.listOfStrings(digitalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
314
            colourDigital = Extract.listOfStrings(digitalObject?.get(Constants.color)),
Jonas Waeber's avatar
Jonas Waeber committed
315
            digitalObjectNote = Extract.languageContainer("descriptiveNote", digitalObject?.get("descriptiveNote")),
Jonas Waeber's avatar
Jonas Waeber committed
316
            locator = if (addLocator) locator else null,
Jonas Waeber's avatar
Jonas Waeber committed
317
            mediaLocation = mediaLocation,
318
319
320
321
            usageConditionsDigital = Extract.languageContainer(
                "conditionsOfUse",
                digitalObject?.get("conditionsOfUse")
            ),
322
            usageDigital = usageDigital,
323
            usageDigitalGroup = usageDigital.map { translationMappers.getReuseStatement(it) },
324

Jonas Waeber's avatar
Jonas Waeber committed
325
326
            digital = digitalObjectValues,

327
328
            // PHYSICAL
            accessPhysical = accessPhysical,
329
            durationPhysical = Extract.listOfStrings(physicalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
330
            colourPhysical = Extract.languageContainer("P60558", physicalObject?.get("P60558")),
331
332
333
334
            physicalCharacteristics = Extract.languageContainer(
                "physicalCharacteristics",
                physicalObject?.get("physicalCharacteristics")
            ),
335
336
337
338
            physicalObjectNote = Extract.languageContainer(
                "descriptiveNote",
                physicalObject?.get("descriptiveNote")
            ),
339
340
341
342
            usageConditionsPhysical = Extract.languageContainer(
                "conditionsOfUse",
                physicalObject?.get("conditionsOfUse")
            ),
343
344
            usagePhysical = Extract.typedEntityByType(physicalRules, "type", "usage", "sameAs")
                .flatMap { it.toList() },
345
346
            callNumber = Extract.typedEntityByType(physicalIdentifiers, "type", "callNumber", "identifier")
                .flatMap { it.toList() },
347
348
349
350
            accessInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(recordSetId, "access"),
            originalInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(recordSetId, "original"),
            masterInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(recordSetId, "master"),

Jonas Waeber's avatar
Jonas Waeber committed
351
            published = (record[Constants.isPublished] as Boolean?) ?: false,
Jonas Waeber's avatar
Jonas Waeber committed
352
            suggest = suggestContainerBuilder.build()[0]
Jonas Waeber's avatar
Jonas Waeber committed
353
354
355
        )
    }
}