DocumentsSearchDocBuilder.kt 17.3 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

19
package ch.memobase
Jonas Waeber's avatar
Jonas Waeber committed
20

21
import ch.memobase.rdf.NS
22
import ch.memobase.rdf.RICO
Jonas Waeber's avatar
Jonas Waeber committed
23
24
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import ch.memobase.builders.AgentContainerBuilder
import ch.memobase.builders.DateContainerBuilder
import ch.memobase.builders.EnrichedFacetContainerBuilder
import ch.memobase.builders.FacettedContainerBuilder
import ch.memobase.builders.IFieldBuilder
import ch.memobase.builders.PersonFacetBuilder
import ch.memobase.builders.PlaceFacetBuilder
import ch.memobase.builders.SuggestContainerBuilder
import ch.memobase.helpers.AspectRatio
import ch.memobase.helpers.Constants
import ch.memobase.helpers.ElasticSearchWrapper
import ch.memobase.helpers.Extract
import ch.memobase.helpers.FacetBuildHelpers
import ch.memobase.helpers.Filter
import ch.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractInstitution
import ch.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractRecordSet
import ch.memobase.helpers.JsonUtility
import ch.memobase.helpers.TranslationMappers
import ch.memobase.model.DocumentsSearchDoc
import ch.memobase.helpers.Constants.IdentifierType
import ch.memobase.model.EnrichedDigitalMetadata
import ch.memobase.model.FacetContainer
import ch.memobase.model.LanguageContainer
import ch.memobase.model.Schema
Jonas Waeber's avatar
Jonas Waeber committed
49

50
class DocumentsSearchDocBuilder(
51
    private val translationMappers: TranslationMappers,
52
    private val elasticSearchWrapper: ElasticSearchWrapper,
53
54
    private val mediaUrl: String
) {
Jonas Waeber's avatar
Jonas Waeber committed
55
    private val log = LogManager.getLogger(this::class.java)
56

57
    fun transform(key: String, input: Map<String, JsonObject>): Schema {
58

Jonas Waeber's avatar
Jonas Waeber committed
59
60
        val record = input[JsonUtility.recordTag] ?:
            throw InvalidInputException("No record defined in the message $key.")
61
        val digitalObject =
Jonas Waeber's avatar
Jonas Waeber committed
62
63
            input.values.firstOrNull { it[Constants.atType] == RICO.Instantiation.uri
                    && it[RICO.type.localName] == RICO.Types.Instantiation.digitalObject }
64
        val physicalObject =
Jonas Waeber's avatar
Jonas Waeber committed
65
66
            input.values.firstOrNull { it[Constants.atType] == RICO.Instantiation.uri
                    && it[RICO.type.localName] == RICO.Types.Instantiation.physicalObject }
67

Jonas Waeber's avatar
Jonas Waeber committed
68
69
70
71
72
73
74
75
76
        val keywordIds = Extract.identifiers(record[Constants.hasSubject])
        val genreIds = Extract.identifiers(record[Constants.hasGenre])
        val publishedByIds = Extract.identifiers(record[Constants.publishedBy])
        val producerIds = Extract.identifiers(record[Constants.producer])
        val spatialIds = Extract.identifiers(record[Constants.spatial])
        val placeOfCaptureIds = Extract.identifiers(record[Constants.placeOfCapture])
        val dateCreatedIds = Extract.identifiers(record[Constants.created])
        val dateIssuedIds = Extract.identifiers(record[Constants.issued])
        val temporalIds = Extract.identifiers(record[Constants.temporal])
Jonas Waeber's avatar
Jonas Waeber committed
77

78
        val personFacetBuilder = PersonFacetBuilder()
Jonas Waeber's avatar
Jonas Waeber committed
79
80
81
        val subjectPersonBuilder = AgentContainerBuilder(keywordIds, Constants.Person, null, input)
        val publisherPersonBuilder = AgentContainerBuilder(publishedByIds, Constants.Person, null, input)
        val producersPersonBuilder = AgentContainerBuilder(producerIds, Constants.Person, null, input)
Jonas Waeber's avatar
Jonas Waeber committed
82
83
        val contributorPersonBuilder =
            AgentContainerBuilder(emptyList(), Constants.Person, Constants.contributor, input)
Jonas Waeber's avatar
Jonas Waeber committed
84
85
86
87
88
        val creatorPersonBuilder = AgentContainerBuilder(emptyList(), Constants.Person, Constants.creator, input)

        val subjectCorporateBodyBuilder = AgentContainerBuilder(keywordIds, Constants.CorporateBody, null, input)
        val publisherCorporateBodyBuilder = AgentContainerBuilder(publishedByIds, Constants.CorporateBody, null, input)
        val producersCorporateBodyBuilder = AgentContainerBuilder(producerIds, Constants.CorporateBody, null, input)
89
        val contributorCorporateBodyBuilder =
Jonas Waeber's avatar
Jonas Waeber committed
90
            AgentContainerBuilder(emptyList(), Constants.CorporateBody, Constants.contributor, input)
Jonas Waeber's avatar
Jonas Waeber committed
91
92
        val creatorCorporateBodyBuilder =
            AgentContainerBuilder(emptyList(), Constants.CorporateBody, Constants.creator, input)
93

Jonas Waeber's avatar
Jonas Waeber committed
94
95
96
97
98
        val subjectAgentBuilder = AgentContainerBuilder(keywordIds, Constants.Agent, null, input)
        val publisherAgentBuilder = AgentContainerBuilder(publishedByIds, Constants.Agent, null, input)
        val producersAgentBuilder = AgentContainerBuilder(producerIds, Constants.Agent, null, input)
        val contributorAgentBuilder = AgentContainerBuilder(emptyList(), Constants.Agent, Constants.contributor, input)
        val creatorAgentBuilder = AgentContainerBuilder(emptyList(), Constants.Agent, Constants.creator, input)
99

Jonas Waeber's avatar
Jonas Waeber committed
100
101
        val placesRelatedBuilder =
            FacettedContainerBuilder(spatialIds, Constants.Place, Constants.name, FacetBuildHelpers::place)
102
        val placeCapturedBuilder =
Jonas Waeber's avatar
Jonas Waeber committed
103
            FacettedContainerBuilder(placeOfCaptureIds, Constants.Place, Constants.name, FacetBuildHelpers::place)
Jonas Waeber's avatar
Jonas Waeber committed
104

105
106
        val placeFacetBuilder = PlaceFacetBuilder()

Jonas Waeber's avatar
Jonas Waeber committed
107
108
109
110
        val dateCreatedBuilder = DateContainerBuilder(dateCreatedIds)
        val dateIssuedBuilder = DateContainerBuilder(dateIssuedIds)
        val temporalBuilder = DateContainerBuilder(temporalIds)

111
        val suggestContainerBuilder = SuggestContainerBuilder(keywordIds)
Jonas Waeber's avatar
Jonas Waeber committed
112

113
114
115
        val digitalIdentifierReferences = Extract.identifiers(digitalObject?.get("identifiedBy"))
        val digitalIdentifierEntities = mutableListOf<JsonObject>()

Jonas Waeber's avatar
Jonas Waeber committed
116
117
118
        val formats = EnrichedFacetContainerBuilder(emptyList(), NS.rico + Constants.CarrierType, Constants.name, input)
        val languages = EnrichedFacetContainerBuilder(emptyList(), NS.rico + Constants.Language, Constants.name, input)
        val genres = EnrichedFacetContainerBuilder(genreIds, NS.skos + Constants.Concept, Constants.prefLabel, input)
119

120
        for (item in input.entries) {
121
122
123
124
125
126
127
            for (builder: IFieldBuilder in listOf(
                personFacetBuilder,
                subjectPersonBuilder,
                publisherPersonBuilder,
                producersPersonBuilder,
                contributorPersonBuilder,
                creatorPersonBuilder,
128
129
130
131
132
133
134
135
136
137
138
                subjectCorporateBodyBuilder,
                publisherCorporateBodyBuilder,
                producersCorporateBodyBuilder,
                contributorCorporateBodyBuilder,
                creatorCorporateBodyBuilder,
                subjectAgentBuilder,
                publisherAgentBuilder,
                producersAgentBuilder,
                contributorAgentBuilder,
                creatorAgentBuilder,

Jonas Waeber's avatar
Jonas Waeber committed
139
140
                placeFacetBuilder,
                placeCapturedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
141
                placesRelatedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
142
143
144
                dateCreatedBuilder,
                dateIssuedBuilder,
                temporalBuilder,
145
146
147
148
                suggestContainerBuilder,
                formats,
                genres,
                languages
149
            )) {
150
                if (builder.filter(item.value)) {
151
                    builder.append(key, item.value)
152
                }
153
154
155

                if (digitalIdentifierReferences.contains(item.key))
                    digitalIdentifierEntities.add(item.value)
156
157
            }
        }
158

Jonas Waeber's avatar
Jonas Waeber committed
159
160
161
        val recordIdentifiers = Filter.entitiesByProperty("identifiedBy", record, input)
        val recordTitles = Filter.entitiesByProperty("hasTitle", record, input)
        val recordRules = Filter.entitiesByProperty("regulatedBy", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
162

Jonas Waeber's avatar
Jonas Waeber committed
163
        val subjects = Filter.entitiesByProperty("hasSubject", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
164

Jonas Waeber's avatar
Jonas Waeber committed
165
        val digitalRules = Filter.entitiesByProperty("regulatedBy", digitalObject, input)
166

Jonas Waeber's avatar
Jonas Waeber committed
167
168
        val physicalRules = Filter.entitiesByProperty("regulatedBy", physicalObject, input)
        val physicalIdentifiers = Filter.entitiesByProperty("identifiedBy", physicalObject, input)
169

170
171
172
173
        val accessPhysical = Extract.typedEntityByType(physicalRules, "type", "access", "name")
            .flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
        val accessDigital = Extract.typedEntityByType(digitalRules, "type", "access", "name")
            .flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
174

175
        val usageDigital = Extract.typedEntityByType(digitalRules, "type", "usage", "sameAs").flatMap { it.toList() }
176

177
        val locator = try {
Jonas Waeber's avatar
Jonas Waeber committed
178
            val value = Extract.extractIdValue(digitalIdentifierEntities, Constants.IdentifierType.main)
179
180
181
182
183
184
185
186
            if (value == null)
                ""
            else
                "${mediaUrl}${value}"
        } catch (ex: NoSuchElementException) {
            ""
        }

Jonas Waeber's avatar
Jonas Waeber committed
187
        val addLocator = Filter.checkLocator(digitalObject)
Jonas Waeber's avatar
Jonas Waeber committed
188
189
190
191
192
193
194
195
196
        val mediaLocation = if (addLocator) {
            if (Filter.checkSftpPrefix(digitalObject!!)) {
                "local"
            } else {
                "remote"
            }
        } else {
            null
        }
Jonas Waeber's avatar
Jonas Waeber committed
197

Jonas Waeber's avatar
Jonas Waeber committed
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
        val digitalObjectValues = digitalObject.let {
            if (it != null) {
                val width = it.getOrDefault("width", "") as String
                val height = it.getOrDefault("height", "") as String
                EnrichedDigitalMetadata(
                    hasFormat = it.getOrDefault("hasFormat", "") as String,
                    isDistributedOn = it.getOrDefault("isDistributedOn", "") as String,
                    hasMimeType = it.getOrDefault("hasMimeType", "") as String,
                    height = height,
                    width = width,
                    aspectRatio = AspectRatio.asFraction(width, height),
                    mediaResourceDescription = it.getOrDefault("mediaResourceDescription", "") as String,
                    orientation = it.getOrDefault("orientation", "") as String,
                    hasColourContent = it.getOrDefault("P60558", "") as String,
                    componentColor = Extract.listOfStrings(digitalObject?.get("componentColor"))

                )
            } else {
                EnrichedDigitalMetadata()
            }
        }

Jonas Waeber's avatar
Jonas Waeber committed
220
        val type = record[Constants.ricoType].let {
221
            if (it == null) {
222
                translationMappers.getDocumentType("Andere")
223
            } else {
224
                translationMappers.getDocumentType(it as String)
225
226
227
            }
        }

228
229
        val recordSetId = extractRecordSet(record)

230
        return DocumentsSearchDoc(
231
232
233
            title = Extract.typedEntityByType(recordTitles, "type", "main", "title"),
            seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"),
            broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"),
234
            type = type,
Jonas Waeber's avatar
Jonas Waeber committed
235
            sourceID = try {
236
                Extract.extractIdValue(recordIdentifiers, IdentifierType.original) ?: "NoSourceIdFound"
Jonas Waeber's avatar
Jonas Waeber committed
237
            } catch (ex: NoSuchElementException) {
Jonas Waeber's avatar
Jonas Waeber committed
238
                log.error("No source id found for record $key.")
Jonas Waeber's avatar
Jonas Waeber committed
239
240
                "NoSourceIdFound"
            },
Jonas Waeber's avatar
Jonas Waeber committed
241
            oldMemobaseId = try {
242
                Extract.extractIdValue(recordIdentifiers, IdentifierType.oldMemobase) ?: ""
Jonas Waeber's avatar
Jonas Waeber committed
243
244
245
246
            } catch (ex: NoSuchElementException) {
                log.warn("No old memobase id found for record $key.")
                ""
            },
247
            sameAs = Extract.listOfStrings(record["sameAs"]),
248
            abstract = Extract.languageContainer("abstract (record id: $key)", record["abstract"]),
249
            recordId = key,
250
            institution = elasticSearchWrapper.getInstitutionNamesFromRecordSet(recordSetId),
251
252
253
254
255
256
257
258
259
260
261
262
263
            recordSet = FacetContainer(
                elasticSearchWrapper.getRecordSetName(recordSetId),
                null,
                if (recordSetId != "") listOf(recordSetId) else emptyList()
            ),
            descriptiveNote = Extract.languageContainer(
                "descriptiveNote (record id: $key)",
                record["descriptiveNote"]
            ),
            scopeAndContent = Extract.languageContainer(
                "scopeAndContent (record id: $key)",
                record["scopeAndContent"]
            ),
264
265
            relatedMaterial = Extract.languageContainer("relation (record id: $key)", record["relation"]),
            source = Extract.languageContainer("source (record id: $key)", record["source"]),
Jonas Waeber's avatar
Jonas Waeber committed
266
267
268
            temporal = temporalBuilder.build(),
            dateCreated = dateCreatedBuilder.build(),
            dateIssued = dateIssuedBuilder.build(),
Jonas Waeber's avatar
Jonas Waeber committed
269
270
            placeCapture = placeCapturedBuilder.build(),
            placeRelated = placesRelatedBuilder.build(),
271
            placeFacet = placeFacetBuilder.build(),
272
            rightsHolder = Extract.typedEntityByType(recordRules, "type", "holder", "name"),
Jonas Waeber's avatar
Jonas Waeber committed
273
274
            conditionsOfUse = Extract.languageContainer(
                "conditionsOfUse (record id: $key)",
Jonas Waeber's avatar
Jonas Waeber committed
275
                record[Constants.conditionsOfUse]
Jonas Waeber's avatar
Jonas Waeber committed
276
            ),
Jonas Waeber's avatar
Jonas Waeber committed
277
            memoriavClaim = record[Constants.sponsoredBy] != null,
Jonas Waeber's avatar
Jonas Waeber committed
278

279
280
281
            format = formats.build(),
            language = languages.build(),
            genre = genres.build(),
282

Jonas Waeber's avatar
Jonas Waeber committed
283
284
285
            keywords = subjects.flatMap {
                Extract.languageContainer(
                    "hasSubject (record id: $key)",
Jonas Waeber's avatar
Jonas Waeber committed
286
                    it[Constants.prefLabel]
Jonas Waeber's avatar
Jonas Waeber committed
287
288
                )
            }.let {
289
290
291
                if (it.isEmpty())
                    LanguageContainer.EMPTY
                else
292
293
294
295
                    it.reduce { acc, languageContainer ->
                        acc.merge(languageContainer)
                    }
            },
296
297
298
299
300
301
302
            personSubject = subjectPersonBuilder.build(),
            personProducer = producersPersonBuilder.build(),
            personPublisher = publisherPersonBuilder.build(),
            personContributor = contributorPersonBuilder.build(),
            personCreator = creatorPersonBuilder.build(),
            personsFacet = personFacetBuilder.build(),

303
304
305
306
307
308
309
310
311
312
313
            corporateBodySubject = subjectCorporateBodyBuilder.build(),
            corporateBodyProducer = producersCorporateBodyBuilder.build(),
            corporateBodyPublisher = publisherCorporateBodyBuilder.build(),
            corporateBodyContributor = contributorCorporateBodyBuilder.build(),
            corporateBodyCreator = creatorCorporateBodyBuilder.build(),

            agentSubject = subjectAgentBuilder.build(),
            agentProducer = producersAgentBuilder.build(),
            agentPublisher = publisherAgentBuilder.build(),
            agentContributor = contributorAgentBuilder.build(),
            agentCreator = creatorAgentBuilder.build(),
314
315

            // DIGITAL & PHYSICAL
316
            access = accessDigital + accessPhysical,
317
318
319

            // DIGITAL
            accessDigital = accessDigital,
320
            durationDigital = Extract.listOfStrings(digitalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
321
            colourDigital = Extract.listOfStrings(digitalObject?.get(Constants.color)),
Jonas Waeber's avatar
Jonas Waeber committed
322
            digitalObjectNote = Extract.languageContainer("descriptiveNote", digitalObject?.get("descriptiveNote")),
Jonas Waeber's avatar
Jonas Waeber committed
323
            locator = if (addLocator) locator else null,
Jonas Waeber's avatar
Jonas Waeber committed
324
            mediaLocation = mediaLocation,
325
326
327
328
            usageConditionsDigital = Extract.languageContainer(
                "conditionsOfUse",
                digitalObject?.get("conditionsOfUse")
            ),
329
            usageDigital = usageDigital,
330
            usageDigitalGroup = usageDigital.map { translationMappers.getReuseStatement(it) },
331

Jonas Waeber's avatar
Jonas Waeber committed
332
333
            digital = digitalObjectValues,

334
335
            // PHYSICAL
            accessPhysical = accessPhysical,
336
            durationPhysical = Extract.listOfStrings(physicalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
337
            colourPhysical = Extract.languageContainer("P60558", physicalObject?.get("P60558")),
338
339
340
341
            physicalCharacteristics = Extract.languageContainer(
                "physicalCharacteristics",
                physicalObject?.get("physicalCharacteristics")
            ),
342
343
344
345
            physicalObjectNote = Extract.languageContainer(
                "descriptiveNote",
                physicalObject?.get("descriptiveNote")
            ),
346
347
348
349
            usageConditionsPhysical = Extract.languageContainer(
                "conditionsOfUse",
                physicalObject?.get("conditionsOfUse")
            ),
350
351
            usagePhysical = Extract.typedEntityByType(physicalRules, "type", "usage", "sameAs")
                .flatMap { it.toList() },
352
353
            callNumber = Extract.typedEntityByType(physicalIdentifiers, "type", "callNumber", "identifier")
                .flatMap { it.toList() },
Jonas Waeber's avatar
Jonas Waeber committed
354
355
356
            accessInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(recordSetId, "accessInstitution"),
            originalInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(recordSetId, "originalInstitution"),
            masterInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(recordSetId, "masterInstitution"),
357

Jonas Waeber's avatar
Jonas Waeber committed
358
            published = (record[Constants.isPublished] as Boolean?) ?: false,
Jonas Waeber's avatar
Jonas Waeber committed
359
            suggest = suggestContainerBuilder.build()[0]
Jonas Waeber's avatar
Jonas Waeber committed
360
361
362
        )
    }
}