DocumentsSearchDocBuilder.kt 15.6 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase

21
import ch.memobase.rdf.NS
Jonas Waeber's avatar
Jonas Waeber committed
22
23
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
Jonas Waeber's avatar
Jonas Waeber committed
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import org.memobase.builders.AgentContainerBuilder
import org.memobase.builders.DateContainerBuilder
import org.memobase.builders.EnrichedFacetContainerBuilder
import org.memobase.builders.FacettedContainerBuilder
import org.memobase.builders.IFieldBuilder
import org.memobase.builders.PersonFacetBuilder
import org.memobase.builders.PlaceFacetBuilder
import org.memobase.builders.SuggestContainerBuilder
import org.memobase.helpers.AspectRatio
import org.memobase.helpers.Extract
import org.memobase.helpers.FacetBuildHelpers
import org.memobase.helpers.Filter
import org.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractInstitution
import org.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractRecordSet
import org.memobase.helpers.KEYS
39
import org.memobase.helpers.TranslationMappers
40
import org.memobase.model.DocumentsSearchDoc
Jonas Waeber's avatar
Jonas Waeber committed
41
import org.memobase.model.EnrichedDigitalMetadata
42
import org.memobase.model.LanguageContainer
43
import org.memobase.model.Schema
Jonas Waeber's avatar
Jonas Waeber committed
44

45
class DocumentsSearchDocBuilder(
46
47
48
    private val translationMappers: TranslationMappers,
    private val mediaUrl: String
) {
Jonas Waeber's avatar
Jonas Waeber committed
49
    private val log = LogManager.getLogger("SearchDocTransform")
50

51
    fun transform(key: String, input: Map<String, JsonObject>): Schema {
52

Jonas Waeber's avatar
Jonas Waeber committed
53
        val record = input["record"] ?: throw InvalidInputException("No record defined in the message.")
54
55
56
57
58
        val digitalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "digitalObject" }
        val physicalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "physicalObject" }

59
60
        val keywordIds = Extract.identifiers(record[KEYS.hasSubject])
        val genreIds = Extract.identifiers(record[KEYS.hasGenre])
Jonas Waeber's avatar
Jonas Waeber committed
61
62
63
64
        val publishedByIds = Extract.identifiers(record[KEYS.publishedBy])
        val producerIds = Extract.identifiers(record[KEYS.producer])
        val spatialIds = Extract.identifiers(record[KEYS.spatial])
        val placeOfCaptureIds = Extract.identifiers(record[KEYS.placeOfCapture])
Jonas Waeber's avatar
Jonas Waeber committed
65
66
67
        val dateCreatedIds = Extract.identifiers(record[KEYS.created])
        val dateIssuedIds = Extract.identifiers(record[KEYS.issued])
        val temporalIds = Extract.identifiers(record[KEYS.temporal])
Jonas Waeber's avatar
Jonas Waeber committed
68

69
        val personFacetBuilder = PersonFacetBuilder()
70
        val subjectPersonBuilder = AgentContainerBuilder(keywordIds, KEYS.Person, null, input)
71
72
73
74
75
        val publisherPersonBuilder = AgentContainerBuilder(publishedByIds, KEYS.Person, null, input)
        val producersPersonBuilder = AgentContainerBuilder(producerIds, KEYS.Person, null, input)
        val contributorPersonBuilder = AgentContainerBuilder(emptyList(), KEYS.Person, KEYS.contributor, input)
        val creatorPersonBuilder = AgentContainerBuilder(emptyList(), KEYS.Person, KEYS.creator, input)

76
        val subjectCorporateBodyBuilder = AgentContainerBuilder(keywordIds, KEYS.CorporateBody, null, input)
77
78
        val publisherCorporateBodyBuilder = AgentContainerBuilder(publishedByIds, KEYS.CorporateBody, null, input)
        val producersCorporateBodyBuilder = AgentContainerBuilder(producerIds, KEYS.CorporateBody, null, input)
79
80
        val contributorCorporateBodyBuilder =
            AgentContainerBuilder(emptyList(), KEYS.CorporateBody, KEYS.contributor, input)
81
82
        val creatorCorporateBodyBuilder = AgentContainerBuilder(emptyList(), KEYS.CorporateBody, KEYS.creator, input)

83
        val subjectAgentBuilder = AgentContainerBuilder(keywordIds, KEYS.Agent, null, input)
84
85
86
87
        val publisherAgentBuilder = AgentContainerBuilder(publishedByIds, KEYS.Agent, null, input)
        val producersAgentBuilder = AgentContainerBuilder(producerIds, KEYS.Agent, null, input)
        val contributorAgentBuilder = AgentContainerBuilder(emptyList(), KEYS.Agent, KEYS.contributor, input)
        val creatorAgentBuilder = AgentContainerBuilder(emptyList(), KEYS.Agent, KEYS.creator, input)
88

Jonas Waeber's avatar
Jonas Waeber committed
89
        val placesRelatedBuilder = FacettedContainerBuilder(spatialIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
90
91
        val placeCapturedBuilder =
            FacettedContainerBuilder(placeOfCaptureIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
Jonas Waeber's avatar
Jonas Waeber committed
92

93
94
        val placeFacetBuilder = PlaceFacetBuilder()

Jonas Waeber's avatar
Jonas Waeber committed
95
96
97
98
        val dateCreatedBuilder = DateContainerBuilder(dateCreatedIds)
        val dateIssuedBuilder = DateContainerBuilder(dateIssuedIds)
        val temporalBuilder = DateContainerBuilder(temporalIds)

99
        val suggestContainerBuilder = SuggestContainerBuilder(keywordIds)
Jonas Waeber's avatar
Jonas Waeber committed
100

101
102
103
        val digitalIdentifierReferences = Extract.identifiers(digitalObject?.get("identifiedBy"))
        val digitalIdentifierEntities = mutableListOf<JsonObject>()

104
105
106
        val formats = EnrichedFacetContainerBuilder(emptyList(), NS.rico + KEYS.CarrierType, KEYS.name, input)
        val languages = EnrichedFacetContainerBuilder(emptyList(), NS.rico + KEYS.Language, KEYS.name, input)
        val genres = EnrichedFacetContainerBuilder(genreIds, NS.skos + KEYS.Concept, KEYS.prefLabel, input)
107

108
        for (item in input.entries) {
109
110
111
112
113
114
115
            for (builder: IFieldBuilder in listOf(
                personFacetBuilder,
                subjectPersonBuilder,
                publisherPersonBuilder,
                producersPersonBuilder,
                contributorPersonBuilder,
                creatorPersonBuilder,
116
117
118
119
120
121
122
123
124
125
126
                subjectCorporateBodyBuilder,
                publisherCorporateBodyBuilder,
                producersCorporateBodyBuilder,
                contributorCorporateBodyBuilder,
                creatorCorporateBodyBuilder,
                subjectAgentBuilder,
                publisherAgentBuilder,
                producersAgentBuilder,
                contributorAgentBuilder,
                creatorAgentBuilder,

Jonas Waeber's avatar
Jonas Waeber committed
127
128
                placeFacetBuilder,
                placeCapturedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
129
                placesRelatedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
130
131
132
                dateCreatedBuilder,
                dateIssuedBuilder,
                temporalBuilder,
133
134
135
136
                suggestContainerBuilder,
                formats,
                genres,
                languages
137
            )) {
138
                if (builder.filter(item.value)) {
139
                    builder.append(key, item.value)
140
                }
141
142
143

                if (digitalIdentifierReferences.contains(item.key))
                    digitalIdentifierEntities.add(item.value)
144
145
            }
        }
146

Jonas Waeber's avatar
Jonas Waeber committed
147
148
149
        val recordIdentifiers = Filter.entitiesByProperty("identifiedBy", record, input)
        val recordTitles = Filter.entitiesByProperty("hasTitle", record, input)
        val recordRules = Filter.entitiesByProperty("regulatedBy", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
150

Jonas Waeber's avatar
Jonas Waeber committed
151
        val subjects = Filter.entitiesByProperty("hasSubject", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
152

Jonas Waeber's avatar
Jonas Waeber committed
153
        val digitalRules = Filter.entitiesByProperty("regulatedBy", digitalObject, input)
154

Jonas Waeber's avatar
Jonas Waeber committed
155
156
        val physicalRules = Filter.entitiesByProperty("regulatedBy", physicalObject, input)
        val physicalIdentifiers = Filter.entitiesByProperty("identifiedBy", physicalObject, input)
157

158
159
160
161
        val accessPhysical = Extract.typedEntityByType(physicalRules, "type", "access", "name")
            .flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
        val accessDigital = Extract.typedEntityByType(digitalRules, "type", "access", "name")
            .flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
162

163
        val usageDigital = Extract.typedEntityByType(digitalRules, "type", "usage", "sameAs").flatMap { it.toList() }
164

165
166
167
168
169
170
171
172
173
174
        val locator = try {
            val value = Extract.extractIdValue(digitalIdentifierEntities, KEYS.IdentifierType.main)
            if (value == null)
                ""
            else
                "${mediaUrl}${value}"
        } catch (ex: NoSuchElementException) {
            ""
        }

Jonas Waeber's avatar
Jonas Waeber committed
175
        val addLocator = Filter.checkLocator(digitalObject)
Jonas Waeber's avatar
Jonas Waeber committed
176
177
178
179
180
181
182
183
184
        val mediaLocation = if (addLocator) {
            if (Filter.checkSftpPrefix(digitalObject!!)) {
                "local"
            } else {
                "remote"
            }
        } else {
            null
        }
Jonas Waeber's avatar
Jonas Waeber committed
185

Jonas Waeber's avatar
Jonas Waeber committed
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
        val digitalObjectValues = digitalObject.let {
            if (it != null) {
                val width = it.getOrDefault("width", "") as String
                val height = it.getOrDefault("height", "") as String
                EnrichedDigitalMetadata(
                    hasFormat = it.getOrDefault("hasFormat", "") as String,
                    isDistributedOn = it.getOrDefault("isDistributedOn", "") as String,
                    hasMimeType = it.getOrDefault("hasMimeType", "") as String,
                    height = height,
                    width = width,
                    aspectRatio = AspectRatio.asFraction(width, height),
                    mediaResourceDescription = it.getOrDefault("mediaResourceDescription", "") as String,
                    orientation = it.getOrDefault("orientation", "") as String,
                    hasColourContent = it.getOrDefault("P60558", "") as String,
                    componentColor = Extract.listOfStrings(digitalObject?.get("componentColor"))

                )
            } else {
                EnrichedDigitalMetadata()
            }
        }

208
209
        val type = record[KEYS.ricoType].let {
            if (it == null) {
210
                translationMappers.getDocumentType("Andere")
211
            } else {
212
                translationMappers.getDocumentType(it as String)
213
214
215
            }
        }

216
        return DocumentsSearchDoc(
217
218
219
            title = Extract.typedEntityByType(recordTitles, "type", "main", "title"),
            seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"),
            broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"),
220
            type = type,
Jonas Waeber's avatar
Jonas Waeber committed
221
            sourceID = try {
222
                Extract.extractIdValue(recordIdentifiers, KEYS.IdentifierType.original) ?: "NoSourceIdFound"
Jonas Waeber's avatar
Jonas Waeber committed
223
            } catch (ex: NoSuchElementException) {
224
                log.error("No source id found for record $key")
Jonas Waeber's avatar
Jonas Waeber committed
225
226
                "NoSourceIdFound"
            },
227
            sameAs = Extract.listOfStrings(record["sameAs"]),
228
            abstract = Extract.languageContainer("abstract (record id: $key)", record["abstract"]),
229
            recordId = key,
Jonas Waeber's avatar
Jonas Waeber committed
230
231
            institution = extractInstitution(record),
            recordSet = extractRecordSet(record),
232
233
234
235
            descriptiveNote = Extract.languageContainer("descriptiveNote (record id: $key)", record["descriptiveNote"]),
            scopeAndContent = Extract.languageContainer("scopeAndContent (record id: $key)", record["scopeAndContent"]),
            relatedMaterial = Extract.languageContainer("relation (record id: $key)", record["relation"]),
            source = Extract.languageContainer("source (record id: $key)", record["source"]),
Jonas Waeber's avatar
Jonas Waeber committed
236
237
238
            temporal = temporalBuilder.build(),
            dateCreated = dateCreatedBuilder.build(),
            dateIssued = dateIssuedBuilder.build(),
Jonas Waeber's avatar
Jonas Waeber committed
239
240
            placeCapture = placeCapturedBuilder.build(),
            placeRelated = placesRelatedBuilder.build(),
241
            placeFacet = placeFacetBuilder.build(),
242
            rightsHolder = Extract.typedEntityByType(recordRules, "type", "holder", "name"),
Jonas Waeber's avatar
Jonas Waeber committed
243
244
245
246
            conditionsOfUse = Extract.languageContainer(
                "conditionsOfUse (record id: $key)",
                record[KEYS.conditionsOfUse]
            ),
247
            memoriavClaim = record[KEYS.sponsoredBy] != null,
Jonas Waeber's avatar
Jonas Waeber committed
248

249
250
251
            format = formats.build(),
            language = languages.build(),
            genre = genres.build(),
252

Jonas Waeber's avatar
Jonas Waeber committed
253
254
255
256
257
258
            keywords = subjects.flatMap {
                Extract.languageContainer(
                    "hasSubject (record id: $key)",
                    it[KEYS.prefLabel]
                )
            }.let {
259
260
261
                if (it.isEmpty())
                    LanguageContainer.EMPTY
                else
262
263
264
265
                    it.reduce { acc, languageContainer ->
                        acc.merge(languageContainer)
                    }
            },
266
267
268
269
270
271
272
            personSubject = subjectPersonBuilder.build(),
            personProducer = producersPersonBuilder.build(),
            personPublisher = publisherPersonBuilder.build(),
            personContributor = contributorPersonBuilder.build(),
            personCreator = creatorPersonBuilder.build(),
            personsFacet = personFacetBuilder.build(),

273
274
275
276
277
278
279
280
281
282
283
            corporateBodySubject = subjectCorporateBodyBuilder.build(),
            corporateBodyProducer = producersCorporateBodyBuilder.build(),
            corporateBodyPublisher = publisherCorporateBodyBuilder.build(),
            corporateBodyContributor = contributorCorporateBodyBuilder.build(),
            corporateBodyCreator = creatorCorporateBodyBuilder.build(),

            agentSubject = subjectAgentBuilder.build(),
            agentProducer = producersAgentBuilder.build(),
            agentPublisher = publisherAgentBuilder.build(),
            agentContributor = contributorAgentBuilder.build(),
            agentCreator = creatorAgentBuilder.build(),
284
285

            // DIGITAL & PHYSICAL
286
            access = accessDigital + accessPhysical,
287
288
289

            // DIGITAL
            accessDigital = accessDigital,
290
            durationDigital = Extract.listOfStrings(digitalObject?.get("duration")),
291
            colourDigital = Extract.listOfStrings(digitalObject?.get(KEYS.color)),
Jonas Waeber's avatar
Jonas Waeber committed
292
            digitalObjectNote = Extract.languageContainer("descriptiveNote", digitalObject?.get("descriptiveNote")),
Jonas Waeber's avatar
Jonas Waeber committed
293
            locator = if (addLocator) locator else null,
Jonas Waeber's avatar
Jonas Waeber committed
294
            mediaLocation = mediaLocation,
295
296
297
298
            usageConditionsDigital = Extract.languageContainer(
                "conditionsOfUse",
                digitalObject?.get("conditionsOfUse")
            ),
299
            usageDigital = usageDigital,
300
            usageDigitalGroup = usageDigital.map { translationMappers.getReuseStatement(it) },
301

Jonas Waeber's avatar
Jonas Waeber committed
302
303
            digital = digitalObjectValues,

304
305
            // PHYSICAL
            accessPhysical = accessPhysical,
306
            durationPhysical = Extract.listOfStrings(physicalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
307
            colourPhysical = Extract.languageContainer("P60558", physicalObject?.get("P60558")),
308
309
310
311
            physicalCharacteristics = Extract.languageContainer(
                "physicalCharacteristics",
                physicalObject?.get("physicalCharacteristics")
            ),
Jonas Waeber's avatar
Jonas Waeber committed
312
            physicalObjectNote = Extract.languageContainer("descriptiveNote", physicalObject?.get("descriptiveNote")),
313
314
315
316
            usageConditionsPhysical = Extract.languageContainer(
                "conditionsOfUse",
                physicalObject?.get("conditionsOfUse")
            ),
317
            usagePhysical = Extract.typedEntityByType(physicalRules, "type", "usage", "sameAs").flatMap { it.toList() },
318
319
            callNumber = Extract.typedEntityByType(physicalIdentifiers, "type", "callNumber", "identifier")
                .flatMap { it.toList() },
Jonas Waeber's avatar
Jonas Waeber committed
320
            published = (record[KEYS.isPublished] as Boolean?) ?: false,
Jonas Waeber's avatar
Jonas Waeber committed
321
            suggest = suggestContainerBuilder.build()[0]
Jonas Waeber's avatar
Jonas Waeber committed
322
323
324
        )
    }
}