DocumentsSearchDocBuilder.kt 17.1 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase

21
import ch.memobase.rdf.NS
Jonas Waeber's avatar
Jonas Waeber committed
22
23
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
Jonas Waeber's avatar
Jonas Waeber committed
24
25
26
27
28
29
30
31
32
import org.memobase.builders.AgentContainerBuilder
import org.memobase.builders.DateContainerBuilder
import org.memobase.builders.EnrichedFacetContainerBuilder
import org.memobase.builders.FacettedContainerBuilder
import org.memobase.builders.IFieldBuilder
import org.memobase.builders.PersonFacetBuilder
import org.memobase.builders.PlaceFacetBuilder
import org.memobase.builders.SuggestContainerBuilder
import org.memobase.helpers.AspectRatio
33
import org.memobase.helpers.ElasticSearchWrapper
Jonas Waeber's avatar
Jonas Waeber committed
34
35
36
37
38
import org.memobase.helpers.Extract
import org.memobase.helpers.FacetBuildHelpers
import org.memobase.helpers.Filter
import org.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractInstitution
import org.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractRecordSet
Jonas Waeber's avatar
Jonas Waeber committed
39
import org.memobase.helpers.Constants
40
import org.memobase.helpers.JsonUtility
41
import org.memobase.helpers.TranslationMappers
42
import org.memobase.model.DocumentsSearchDoc
Jonas Waeber's avatar
Jonas Waeber committed
43
import org.memobase.model.EnrichedDigitalMetadata
44
import org.memobase.model.FacetContainer
45
import org.memobase.model.LanguageContainer
46
import org.memobase.model.Schema
Jonas Waeber's avatar
Jonas Waeber committed
47

48
class DocumentsSearchDocBuilder(
49
    private val translationMappers: TranslationMappers,
50
    private val elasticSearchWrapper: ElasticSearchWrapper,
51
52
    private val mediaUrl: String
) {
Jonas Waeber's avatar
Jonas Waeber committed
53
    private val log = LogManager.getLogger("SearchDocTransform")
54

55
    fun transform(key: String, input: Map<String, JsonObject>): Schema {
56

57
        val record = input[JsonUtility.recordTag] ?: throw InvalidInputException("No record defined in the message.")
58
59
60
61
62
        val digitalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "digitalObject" }
        val physicalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "physicalObject" }

Jonas Waeber's avatar
Jonas Waeber committed
63
64
65
66
67
68
69
70
71
        val keywordIds = Extract.identifiers(record[Constants.hasSubject])
        val genreIds = Extract.identifiers(record[Constants.hasGenre])
        val publishedByIds = Extract.identifiers(record[Constants.publishedBy])
        val producerIds = Extract.identifiers(record[Constants.producer])
        val spatialIds = Extract.identifiers(record[Constants.spatial])
        val placeOfCaptureIds = Extract.identifiers(record[Constants.placeOfCapture])
        val dateCreatedIds = Extract.identifiers(record[Constants.created])
        val dateIssuedIds = Extract.identifiers(record[Constants.issued])
        val temporalIds = Extract.identifiers(record[Constants.temporal])
Jonas Waeber's avatar
Jonas Waeber committed
72

73
        val personFacetBuilder = PersonFacetBuilder()
Jonas Waeber's avatar
Jonas Waeber committed
74
75
76
77
78
79
80
81
82
        val subjectPersonBuilder = AgentContainerBuilder(keywordIds, Constants.Person, null, input)
        val publisherPersonBuilder = AgentContainerBuilder(publishedByIds, Constants.Person, null, input)
        val producersPersonBuilder = AgentContainerBuilder(producerIds, Constants.Person, null, input)
        val contributorPersonBuilder = AgentContainerBuilder(emptyList(), Constants.Person, Constants.contributor, input)
        val creatorPersonBuilder = AgentContainerBuilder(emptyList(), Constants.Person, Constants.creator, input)

        val subjectCorporateBodyBuilder = AgentContainerBuilder(keywordIds, Constants.CorporateBody, null, input)
        val publisherCorporateBodyBuilder = AgentContainerBuilder(publishedByIds, Constants.CorporateBody, null, input)
        val producersCorporateBodyBuilder = AgentContainerBuilder(producerIds, Constants.CorporateBody, null, input)
83
        val contributorCorporateBodyBuilder =
Jonas Waeber's avatar
Jonas Waeber committed
84
85
            AgentContainerBuilder(emptyList(), Constants.CorporateBody, Constants.contributor, input)
        val creatorCorporateBodyBuilder = AgentContainerBuilder(emptyList(), Constants.CorporateBody, Constants.creator, input)
86

Jonas Waeber's avatar
Jonas Waeber committed
87
88
89
90
91
        val subjectAgentBuilder = AgentContainerBuilder(keywordIds, Constants.Agent, null, input)
        val publisherAgentBuilder = AgentContainerBuilder(publishedByIds, Constants.Agent, null, input)
        val producersAgentBuilder = AgentContainerBuilder(producerIds, Constants.Agent, null, input)
        val contributorAgentBuilder = AgentContainerBuilder(emptyList(), Constants.Agent, Constants.contributor, input)
        val creatorAgentBuilder = AgentContainerBuilder(emptyList(), Constants.Agent, Constants.creator, input)
92

Jonas Waeber's avatar
Jonas Waeber committed
93
        val placesRelatedBuilder = FacettedContainerBuilder(spatialIds, Constants.Place, Constants.name, FacetBuildHelpers::place)
94
        val placeCapturedBuilder =
Jonas Waeber's avatar
Jonas Waeber committed
95
            FacettedContainerBuilder(placeOfCaptureIds, Constants.Place, Constants.name, FacetBuildHelpers::place)
Jonas Waeber's avatar
Jonas Waeber committed
96

97
98
        val placeFacetBuilder = PlaceFacetBuilder()

Jonas Waeber's avatar
Jonas Waeber committed
99
100
101
102
        val dateCreatedBuilder = DateContainerBuilder(dateCreatedIds)
        val dateIssuedBuilder = DateContainerBuilder(dateIssuedIds)
        val temporalBuilder = DateContainerBuilder(temporalIds)

103
        val suggestContainerBuilder = SuggestContainerBuilder(keywordIds)
Jonas Waeber's avatar
Jonas Waeber committed
104

105
106
107
        val digitalIdentifierReferences = Extract.identifiers(digitalObject?.get("identifiedBy"))
        val digitalIdentifierEntities = mutableListOf<JsonObject>()

Jonas Waeber's avatar
Jonas Waeber committed
108
109
110
        val formats = EnrichedFacetContainerBuilder(emptyList(), NS.rico + Constants.CarrierType, Constants.name, input)
        val languages = EnrichedFacetContainerBuilder(emptyList(), NS.rico + Constants.Language, Constants.name, input)
        val genres = EnrichedFacetContainerBuilder(genreIds, NS.skos + Constants.Concept, Constants.prefLabel, input)
111

112
        for (item in input.entries) {
113
114
115
116
117
118
119
            for (builder: IFieldBuilder in listOf(
                personFacetBuilder,
                subjectPersonBuilder,
                publisherPersonBuilder,
                producersPersonBuilder,
                contributorPersonBuilder,
                creatorPersonBuilder,
120
121
122
123
124
125
126
127
128
129
130
                subjectCorporateBodyBuilder,
                publisherCorporateBodyBuilder,
                producersCorporateBodyBuilder,
                contributorCorporateBodyBuilder,
                creatorCorporateBodyBuilder,
                subjectAgentBuilder,
                publisherAgentBuilder,
                producersAgentBuilder,
                contributorAgentBuilder,
                creatorAgentBuilder,

Jonas Waeber's avatar
Jonas Waeber committed
131
132
                placeFacetBuilder,
                placeCapturedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
133
                placesRelatedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
134
135
136
                dateCreatedBuilder,
                dateIssuedBuilder,
                temporalBuilder,
137
138
139
140
                suggestContainerBuilder,
                formats,
                genres,
                languages
141
            )) {
142
                if (builder.filter(item.value)) {
143
                    builder.append(key, item.value)
144
                }
145
146
147

                if (digitalIdentifierReferences.contains(item.key))
                    digitalIdentifierEntities.add(item.value)
148
149
            }
        }
150

Jonas Waeber's avatar
Jonas Waeber committed
151
152
153
        val recordIdentifiers = Filter.entitiesByProperty("identifiedBy", record, input)
        val recordTitles = Filter.entitiesByProperty("hasTitle", record, input)
        val recordRules = Filter.entitiesByProperty("regulatedBy", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
154

Jonas Waeber's avatar
Jonas Waeber committed
155
        val subjects = Filter.entitiesByProperty("hasSubject", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
156

Jonas Waeber's avatar
Jonas Waeber committed
157
        val digitalRules = Filter.entitiesByProperty("regulatedBy", digitalObject, input)
158

Jonas Waeber's avatar
Jonas Waeber committed
159
160
        val physicalRules = Filter.entitiesByProperty("regulatedBy", physicalObject, input)
        val physicalIdentifiers = Filter.entitiesByProperty("identifiedBy", physicalObject, input)
161

162
163
164
165
        val accessPhysical = Extract.typedEntityByType(physicalRules, "type", "access", "name")
            .flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
        val accessDigital = Extract.typedEntityByType(digitalRules, "type", "access", "name")
            .flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
166

167
        val usageDigital = Extract.typedEntityByType(digitalRules, "type", "usage", "sameAs").flatMap { it.toList() }
168

169
        val locator = try {
Jonas Waeber's avatar
Jonas Waeber committed
170
            val value = Extract.extractIdValue(digitalIdentifierEntities, Constants.IdentifierType.main)
171
172
173
174
175
176
177
178
            if (value == null)
                ""
            else
                "${mediaUrl}${value}"
        } catch (ex: NoSuchElementException) {
            ""
        }

Jonas Waeber's avatar
Jonas Waeber committed
179
        val addLocator = Filter.checkLocator(digitalObject)
Jonas Waeber's avatar
Jonas Waeber committed
180
181
182
183
184
185
186
187
188
        val mediaLocation = if (addLocator) {
            if (Filter.checkSftpPrefix(digitalObject!!)) {
                "local"
            } else {
                "remote"
            }
        } else {
            null
        }
Jonas Waeber's avatar
Jonas Waeber committed
189

Jonas Waeber's avatar
Jonas Waeber committed
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
        val digitalObjectValues = digitalObject.let {
            if (it != null) {
                val width = it.getOrDefault("width", "") as String
                val height = it.getOrDefault("height", "") as String
                EnrichedDigitalMetadata(
                    hasFormat = it.getOrDefault("hasFormat", "") as String,
                    isDistributedOn = it.getOrDefault("isDistributedOn", "") as String,
                    hasMimeType = it.getOrDefault("hasMimeType", "") as String,
                    height = height,
                    width = width,
                    aspectRatio = AspectRatio.asFraction(width, height),
                    mediaResourceDescription = it.getOrDefault("mediaResourceDescription", "") as String,
                    orientation = it.getOrDefault("orientation", "") as String,
                    hasColourContent = it.getOrDefault("P60558", "") as String,
                    componentColor = Extract.listOfStrings(digitalObject?.get("componentColor"))

                )
            } else {
                EnrichedDigitalMetadata()
            }
        }

Jonas Waeber's avatar
Jonas Waeber committed
212
        val type = record[Constants.ricoType].let {
213
            if (it == null) {
214
                translationMappers.getDocumentType("Andere")
215
            } else {
216
                translationMappers.getDocumentType(it as String)
217
218
219
            }
        }

220
221
        val recordSetId = extractRecordSet(record)

222
        return DocumentsSearchDoc(
223
224
225
            title = Extract.typedEntityByType(recordTitles, "type", "main", "title"),
            seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"),
            broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"),
226
            type = type,
Jonas Waeber's avatar
Jonas Waeber committed
227
            sourceID = try {
Jonas Waeber's avatar
Jonas Waeber committed
228
                Extract.extractIdValue(recordIdentifiers, Constants.IdentifierType.original) ?: "NoSourceIdFound"
Jonas Waeber's avatar
Jonas Waeber committed
229
            } catch (ex: NoSuchElementException) {
Jonas Waeber's avatar
Jonas Waeber committed
230
                log.error("No source id found for record $key.")
Jonas Waeber's avatar
Jonas Waeber committed
231
232
                "NoSourceIdFound"
            },
Jonas Waeber's avatar
Jonas Waeber committed
233
            oldMemobaseId = try {
Jonas Waeber's avatar
Jonas Waeber committed
234
                Extract.extractIdValue(recordIdentifiers, Constants.IdentifierType.oldMemobase) ?: ""
Jonas Waeber's avatar
Jonas Waeber committed
235
236
237
238
            } catch (ex: NoSuchElementException) {
                log.warn("No old memobase id found for record $key.")
                ""
            },
239
            sameAs = Extract.listOfStrings(record["sameAs"]),
240
            abstract = Extract.languageContainer("abstract (record id: $key)", record["abstract"]),
241
            recordId = key,
242
243
244
245
246
247
248
249
250
251
252
253
254
255
            institution = extractInstitution(record).map { value -> elasticSearchWrapper.getInstitutionName(value) },
            recordSet = FacetContainer(
                elasticSearchWrapper.getRecordSetName(recordSetId),
                null,
                if (recordSetId != "") listOf(recordSetId) else emptyList()
            ),
            descriptiveNote = Extract.languageContainer(
                "descriptiveNote (record id: $key)",
                record["descriptiveNote"]
            ),
            scopeAndContent = Extract.languageContainer(
                "scopeAndContent (record id: $key)",
                record["scopeAndContent"]
            ),
256
257
            relatedMaterial = Extract.languageContainer("relation (record id: $key)", record["relation"]),
            source = Extract.languageContainer("source (record id: $key)", record["source"]),
Jonas Waeber's avatar
Jonas Waeber committed
258
259
260
            temporal = temporalBuilder.build(),
            dateCreated = dateCreatedBuilder.build(),
            dateIssued = dateIssuedBuilder.build(),
Jonas Waeber's avatar
Jonas Waeber committed
261
262
            placeCapture = placeCapturedBuilder.build(),
            placeRelated = placesRelatedBuilder.build(),
263
            placeFacet = placeFacetBuilder.build(),
264
            rightsHolder = Extract.typedEntityByType(recordRules, "type", "holder", "name"),
Jonas Waeber's avatar
Jonas Waeber committed
265
266
            conditionsOfUse = Extract.languageContainer(
                "conditionsOfUse (record id: $key)",
Jonas Waeber's avatar
Jonas Waeber committed
267
                record[Constants.conditionsOfUse]
Jonas Waeber's avatar
Jonas Waeber committed
268
            ),
Jonas Waeber's avatar
Jonas Waeber committed
269
            memoriavClaim = record[Constants.sponsoredBy] != null,
Jonas Waeber's avatar
Jonas Waeber committed
270

271
272
273
            format = formats.build(),
            language = languages.build(),
            genre = genres.build(),
274

Jonas Waeber's avatar
Jonas Waeber committed
275
276
277
            keywords = subjects.flatMap {
                Extract.languageContainer(
                    "hasSubject (record id: $key)",
Jonas Waeber's avatar
Jonas Waeber committed
278
                    it[Constants.prefLabel]
Jonas Waeber's avatar
Jonas Waeber committed
279
280
                )
            }.let {
281
282
283
                if (it.isEmpty())
                    LanguageContainer.EMPTY
                else
284
285
286
287
                    it.reduce { acc, languageContainer ->
                        acc.merge(languageContainer)
                    }
            },
288
289
290
291
292
293
294
            personSubject = subjectPersonBuilder.build(),
            personProducer = producersPersonBuilder.build(),
            personPublisher = publisherPersonBuilder.build(),
            personContributor = contributorPersonBuilder.build(),
            personCreator = creatorPersonBuilder.build(),
            personsFacet = personFacetBuilder.build(),

295
296
297
298
299
300
301
302
303
304
305
            corporateBodySubject = subjectCorporateBodyBuilder.build(),
            corporateBodyProducer = producersCorporateBodyBuilder.build(),
            corporateBodyPublisher = publisherCorporateBodyBuilder.build(),
            corporateBodyContributor = contributorCorporateBodyBuilder.build(),
            corporateBodyCreator = creatorCorporateBodyBuilder.build(),

            agentSubject = subjectAgentBuilder.build(),
            agentProducer = producersAgentBuilder.build(),
            agentPublisher = publisherAgentBuilder.build(),
            agentContributor = contributorAgentBuilder.build(),
            agentCreator = creatorAgentBuilder.build(),
306
307

            // DIGITAL & PHYSICAL
308
            access = accessDigital + accessPhysical,
309
310
311

            // DIGITAL
            accessDigital = accessDigital,
312
            durationDigital = Extract.listOfStrings(digitalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
313
            colourDigital = Extract.listOfStrings(digitalObject?.get(Constants.color)),
Jonas Waeber's avatar
Jonas Waeber committed
314
            digitalObjectNote = Extract.languageContainer("descriptiveNote", digitalObject?.get("descriptiveNote")),
Jonas Waeber's avatar
Jonas Waeber committed
315
            locator = if (addLocator) locator else null,
Jonas Waeber's avatar
Jonas Waeber committed
316
            mediaLocation = mediaLocation,
317
318
319
320
            usageConditionsDigital = Extract.languageContainer(
                "conditionsOfUse",
                digitalObject?.get("conditionsOfUse")
            ),
321
            usageDigital = usageDigital,
322
            usageDigitalGroup = usageDigital.map { translationMappers.getReuseStatement(it) },
323

Jonas Waeber's avatar
Jonas Waeber committed
324
325
            digital = digitalObjectValues,

326
327
            // PHYSICAL
            accessPhysical = accessPhysical,
328
            durationPhysical = Extract.listOfStrings(physicalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
329
            colourPhysical = Extract.languageContainer("P60558", physicalObject?.get("P60558")),
330
331
332
333
            physicalCharacteristics = Extract.languageContainer(
                "physicalCharacteristics",
                physicalObject?.get("physicalCharacteristics")
            ),
334
335
336
337
            physicalObjectNote = Extract.languageContainer(
                "descriptiveNote",
                physicalObject?.get("descriptiveNote")
            ),
338
339
340
341
            usageConditionsPhysical = Extract.languageContainer(
                "conditionsOfUse",
                physicalObject?.get("conditionsOfUse")
            ),
342
343
            usagePhysical = Extract.typedEntityByType(physicalRules, "type", "usage", "sameAs")
                .flatMap { it.toList() },
344
345
            callNumber = Extract.typedEntityByType(physicalIdentifiers, "type", "callNumber", "identifier")
                .flatMap { it.toList() },
346
347
348
349
            accessInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(recordSetId, "access"),
            originalInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(recordSetId, "original"),
            masterInstitution = elasticSearchWrapper.getExtraInstitutionsFromRecordSet(recordSetId, "master"),

Jonas Waeber's avatar
Jonas Waeber committed
350
            published = (record[Constants.isPublished] as Boolean?) ?: false,
Jonas Waeber's avatar
Jonas Waeber committed
351
            suggest = suggestContainerBuilder.build()[0]
Jonas Waeber's avatar
Jonas Waeber committed
352
353
354
        )
    }
}