DocumentsSearchDocBuilder.kt 15.4 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase

Jonas Waeber's avatar
Jonas Waeber committed
21
import ch.memobase.rdf.EBUCORE
22
import ch.memobase.rdf.NS
Jonas Waeber's avatar
Jonas Waeber committed
23
24
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
Jonas Waeber's avatar
Jonas Waeber committed
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import org.memobase.builders.AgentContainerBuilder
import org.memobase.builders.DateContainerBuilder
import org.memobase.builders.EnrichedFacetContainerBuilder
import org.memobase.builders.FacettedContainerBuilder
import org.memobase.builders.IFieldBuilder
import org.memobase.builders.PersonFacetBuilder
import org.memobase.builders.PlaceFacetBuilder
import org.memobase.builders.SuggestContainerBuilder
import org.memobase.helpers.AspectRatio
import org.memobase.helpers.Extract
import org.memobase.helpers.FacetBuildHelpers
import org.memobase.helpers.Filter
import org.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractInstitution
import org.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractRecordSet
import org.memobase.helpers.KEYS
40
import org.memobase.helpers.TranslationMappers
41
import org.memobase.model.DocumentsSearchDoc
Jonas Waeber's avatar
Jonas Waeber committed
42
import org.memobase.model.EnrichedDigitalMetadata
43
import org.memobase.model.LanguageContainer
44
import org.memobase.model.Schema
Jonas Waeber's avatar
Jonas Waeber committed
45

46
class DocumentsSearchDocBuilder(
47
48
49
    private val translationMappers: TranslationMappers,
    private val mediaUrl: String
) {
Jonas Waeber's avatar
Jonas Waeber committed
50
    private val log = LogManager.getLogger("SearchDocTransform")
51

52
    fun transform(key: String, input: Map<String, JsonObject>): Schema {
53

Jonas Waeber's avatar
Jonas Waeber committed
54
        val record = input["record"] ?: throw InvalidInputException("No record defined in the message.")
55
56
57
58
59
        val digitalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "digitalObject" }
        val physicalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "physicalObject" }

60
61
        val keywordIds = Extract.identifiers(record[KEYS.hasSubject])
        val genreIds = Extract.identifiers(record[KEYS.hasGenre])
Jonas Waeber's avatar
Jonas Waeber committed
62
63
64
65
        val publishedByIds = Extract.identifiers(record[KEYS.publishedBy])
        val producerIds = Extract.identifiers(record[KEYS.producer])
        val spatialIds = Extract.identifiers(record[KEYS.spatial])
        val placeOfCaptureIds = Extract.identifiers(record[KEYS.placeOfCapture])
Jonas Waeber's avatar
Jonas Waeber committed
66
67
68
        val dateCreatedIds = Extract.identifiers(record[KEYS.created])
        val dateIssuedIds = Extract.identifiers(record[KEYS.issued])
        val temporalIds = Extract.identifiers(record[KEYS.temporal])
Jonas Waeber's avatar
Jonas Waeber committed
69

70
        val personFacetBuilder = PersonFacetBuilder()
71
        val subjectPersonBuilder = AgentContainerBuilder(keywordIds, KEYS.Person, null, input)
72
73
74
75
76
        val publisherPersonBuilder = AgentContainerBuilder(publishedByIds, KEYS.Person, null, input)
        val producersPersonBuilder = AgentContainerBuilder(producerIds, KEYS.Person, null, input)
        val contributorPersonBuilder = AgentContainerBuilder(emptyList(), KEYS.Person, KEYS.contributor, input)
        val creatorPersonBuilder = AgentContainerBuilder(emptyList(), KEYS.Person, KEYS.creator, input)

77
        val subjectCorporateBodyBuilder = AgentContainerBuilder(keywordIds, KEYS.CorporateBody, null, input)
78
79
        val publisherCorporateBodyBuilder = AgentContainerBuilder(publishedByIds, KEYS.CorporateBody, null, input)
        val producersCorporateBodyBuilder = AgentContainerBuilder(producerIds, KEYS.CorporateBody, null, input)
80
81
        val contributorCorporateBodyBuilder =
            AgentContainerBuilder(emptyList(), KEYS.CorporateBody, KEYS.contributor, input)
82
83
        val creatorCorporateBodyBuilder = AgentContainerBuilder(emptyList(), KEYS.CorporateBody, KEYS.creator, input)

84
        val subjectAgentBuilder = AgentContainerBuilder(keywordIds, KEYS.Agent, null, input)
85
86
87
88
        val publisherAgentBuilder = AgentContainerBuilder(publishedByIds, KEYS.Agent, null, input)
        val producersAgentBuilder = AgentContainerBuilder(producerIds, KEYS.Agent, null, input)
        val contributorAgentBuilder = AgentContainerBuilder(emptyList(), KEYS.Agent, KEYS.contributor, input)
        val creatorAgentBuilder = AgentContainerBuilder(emptyList(), KEYS.Agent, KEYS.creator, input)
89

Jonas Waeber's avatar
Jonas Waeber committed
90
        val placesRelatedBuilder = FacettedContainerBuilder(spatialIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
91
92
        val placeCapturedBuilder =
            FacettedContainerBuilder(placeOfCaptureIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
Jonas Waeber's avatar
Jonas Waeber committed
93

94
95
        val placeFacetBuilder = PlaceFacetBuilder()

Jonas Waeber's avatar
Jonas Waeber committed
96
97
98
99
        val dateCreatedBuilder = DateContainerBuilder(dateCreatedIds)
        val dateIssuedBuilder = DateContainerBuilder(dateIssuedIds)
        val temporalBuilder = DateContainerBuilder(temporalIds)

100
        val suggestContainerBuilder = SuggestContainerBuilder(keywordIds)
Jonas Waeber's avatar
Jonas Waeber committed
101

102
103
104
        val digitalIdentifierReferences = Extract.identifiers(digitalObject?.get("identifiedBy"))
        val digitalIdentifierEntities = mutableListOf<JsonObject>()

105
106
107
        val formats = EnrichedFacetContainerBuilder(emptyList(), NS.rico + KEYS.CarrierType, KEYS.name, input)
        val languages = EnrichedFacetContainerBuilder(emptyList(), NS.rico + KEYS.Language, KEYS.name, input)
        val genres = EnrichedFacetContainerBuilder(genreIds, NS.skos + KEYS.Concept, KEYS.prefLabel, input)
108

109
        for (item in input.entries) {
110
111
112
113
114
115
116
            for (builder: IFieldBuilder in listOf(
                personFacetBuilder,
                subjectPersonBuilder,
                publisherPersonBuilder,
                producersPersonBuilder,
                contributorPersonBuilder,
                creatorPersonBuilder,
117
118
119
120
121
122
123
124
125
126
127
                subjectCorporateBodyBuilder,
                publisherCorporateBodyBuilder,
                producersCorporateBodyBuilder,
                contributorCorporateBodyBuilder,
                creatorCorporateBodyBuilder,
                subjectAgentBuilder,
                publisherAgentBuilder,
                producersAgentBuilder,
                contributorAgentBuilder,
                creatorAgentBuilder,

Jonas Waeber's avatar
Jonas Waeber committed
128
129
                placeFacetBuilder,
                placeCapturedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
130
                placesRelatedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
131
132
133
                dateCreatedBuilder,
                dateIssuedBuilder,
                temporalBuilder,
134
135
136
137
                suggestContainerBuilder,
                formats,
                genres,
                languages
138
            )) {
139
                if (builder.filter(item.value)) {
140
                    builder.append(key, item.value)
141
                }
142
143
144

                if (digitalIdentifierReferences.contains(item.key))
                    digitalIdentifierEntities.add(item.value)
145
146
            }
        }
147

Jonas Waeber's avatar
Jonas Waeber committed
148
149
150
        val recordIdentifiers = Filter.entitiesByProperty("identifiedBy", record, input)
        val recordTitles = Filter.entitiesByProperty("hasTitle", record, input)
        val recordRules = Filter.entitiesByProperty("regulatedBy", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
151

Jonas Waeber's avatar
Jonas Waeber committed
152
        val subjects = Filter.entitiesByProperty("hasSubject", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
153

Jonas Waeber's avatar
Jonas Waeber committed
154
        val digitalRules = Filter.entitiesByProperty("regulatedBy", digitalObject, input)
155

Jonas Waeber's avatar
Jonas Waeber committed
156
157
        val physicalRules = Filter.entitiesByProperty("regulatedBy", physicalObject, input)
        val physicalIdentifiers = Filter.entitiesByProperty("identifiedBy", physicalObject, input)
158

159
160
161
162
        val accessPhysical = Extract.typedEntityByType(physicalRules, "type", "access", "name")
            .flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
        val accessDigital = Extract.typedEntityByType(digitalRules, "type", "access", "name")
            .flatMap { it.toList() }.map { translationMappers.getAccessTerm(it) }
163

164
        val usageDigital = Extract.typedEntityByType(digitalRules, "type", "usage", "sameAs").flatMap { it.toList() }
165

166
167
168
169
170
171
172
173
174
175
        val locator = try {
            val value = Extract.extractIdValue(digitalIdentifierEntities, KEYS.IdentifierType.main)
            if (value == null)
                ""
            else
                "${mediaUrl}${value}"
        } catch (ex: NoSuchElementException) {
            ""
        }

Jonas Waeber's avatar
Jonas Waeber committed
176
        val addLocator = Filter.checkLocator(digitalObject, accessDigital)
Jonas Waeber's avatar
Jonas Waeber committed
177

Jonas Waeber's avatar
Jonas Waeber committed
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
        val digitalObjectValues = digitalObject.let {
            if (it != null) {
                val width = it.getOrDefault("width", "") as String
                val height = it.getOrDefault("height", "") as String
                EnrichedDigitalMetadata(
                    hasFormat = it.getOrDefault("hasFormat", "") as String,
                    isDistributedOn = it.getOrDefault("isDistributedOn", "") as String,
                    hasMimeType = it.getOrDefault("hasMimeType", "") as String,
                    height = height,
                    width = width,
                    aspectRatio = AspectRatio.asFraction(width, height),
                    mediaResourceDescription = it.getOrDefault("mediaResourceDescription", "") as String,
                    orientation = it.getOrDefault("orientation", "") as String,
                    hasColourContent = it.getOrDefault("P60558", "") as String,
                    componentColor = Extract.listOfStrings(digitalObject?.get("componentColor"))

                )
            } else {
                EnrichedDigitalMetadata()
            }
        }

200
201
        val type = record[KEYS.ricoType].let {
            if (it == null) {
202
                translationMappers.getDocumentType("Andere")
203
            } else {
204
                translationMappers.getDocumentType(it as String)
205
206
207
            }
        }

208
        return DocumentsSearchDoc(
209
210
211
            title = Extract.typedEntityByType(recordTitles, "type", "main", "title"),
            seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"),
            broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"),
212
            type = type,
Jonas Waeber's avatar
Jonas Waeber committed
213
            sourceID = try {
214
                Extract.extractIdValue(recordIdentifiers, KEYS.IdentifierType.original) ?: "NoSourceIdFound"
Jonas Waeber's avatar
Jonas Waeber committed
215
            } catch (ex: NoSuchElementException) {
216
                log.error("No source id found for record $key")
Jonas Waeber's avatar
Jonas Waeber committed
217
218
                "NoSourceIdFound"
            },
219
            sameAs = Extract.listOfStrings(record["sameAs"]),
220
            abstract = Extract.languageContainer("abstract (record id: $key)", record["abstract"]),
221
            recordId = key,
Jonas Waeber's avatar
Jonas Waeber committed
222
223
            institution = extractInstitution(record),
            recordSet = extractRecordSet(record),
224
225
226
227
            descriptiveNote = Extract.languageContainer("descriptiveNote (record id: $key)", record["descriptiveNote"]),
            scopeAndContent = Extract.languageContainer("scopeAndContent (record id: $key)", record["scopeAndContent"]),
            relatedMaterial = Extract.languageContainer("relation (record id: $key)", record["relation"]),
            source = Extract.languageContainer("source (record id: $key)", record["source"]),
Jonas Waeber's avatar
Jonas Waeber committed
228
229
230
            temporal = temporalBuilder.build(),
            dateCreated = dateCreatedBuilder.build(),
            dateIssued = dateIssuedBuilder.build(),
Jonas Waeber's avatar
Jonas Waeber committed
231
232
            placeCapture = placeCapturedBuilder.build(),
            placeRelated = placesRelatedBuilder.build(),
233
            placeFacet = placeFacetBuilder.build(),
234
            rightsHolder = Extract.typedEntityByType(recordRules, "type", "holder", "name"),
Jonas Waeber's avatar
Jonas Waeber committed
235
236
237
238
            conditionsOfUse = Extract.languageContainer(
                "conditionsOfUse (record id: $key)",
                record[KEYS.conditionsOfUse]
            ),
239
            memoriavClaim = record[KEYS.sponsoredBy] != null,
Jonas Waeber's avatar
Jonas Waeber committed
240

241
242
243
            format = formats.build(),
            language = languages.build(),
            genre = genres.build(),
244

Jonas Waeber's avatar
Jonas Waeber committed
245
246
247
248
249
250
            keywords = subjects.flatMap {
                Extract.languageContainer(
                    "hasSubject (record id: $key)",
                    it[KEYS.prefLabel]
                )
            }.let {
251
252
253
                if (it.isEmpty())
                    LanguageContainer.EMPTY
                else
254
255
256
257
                    it.reduce { acc, languageContainer ->
                        acc.merge(languageContainer)
                    }
            },
258
259
260
261
262
263
264
            personSubject = subjectPersonBuilder.build(),
            personProducer = producersPersonBuilder.build(),
            personPublisher = publisherPersonBuilder.build(),
            personContributor = contributorPersonBuilder.build(),
            personCreator = creatorPersonBuilder.build(),
            personsFacet = personFacetBuilder.build(),

265
266
267
268
269
270
271
272
273
274
275
            corporateBodySubject = subjectCorporateBodyBuilder.build(),
            corporateBodyProducer = producersCorporateBodyBuilder.build(),
            corporateBodyPublisher = publisherCorporateBodyBuilder.build(),
            corporateBodyContributor = contributorCorporateBodyBuilder.build(),
            corporateBodyCreator = creatorCorporateBodyBuilder.build(),

            agentSubject = subjectAgentBuilder.build(),
            agentProducer = producersAgentBuilder.build(),
            agentPublisher = publisherAgentBuilder.build(),
            agentContributor = contributorAgentBuilder.build(),
            agentCreator = creatorAgentBuilder.build(),
276
277

            // DIGITAL & PHYSICAL
278
            access = accessDigital + accessPhysical,
279
280
281

            // DIGITAL
            accessDigital = accessDigital,
282
            durationDigital = Extract.listOfStrings(digitalObject?.get("duration")),
283
            colourDigital = Extract.listOfStrings(digitalObject?.get(KEYS.color)),
Jonas Waeber's avatar
Jonas Waeber committed
284
            digitalObjectNote = Extract.languageContainer("descriptiveNote", digitalObject?.get("descriptiveNote")),
Jonas Waeber's avatar
Jonas Waeber committed
285
            locator =  if (addLocator) locator else null,
286
287
288
289
            usageConditionsDigital = Extract.languageContainer(
                "conditionsOfUse",
                digitalObject?.get("conditionsOfUse")
            ),
290
            usageDigital = usageDigital,
291
            usageDigitalGroup = usageDigital.map { translationMappers.getReuseStatement(it) },
292

Jonas Waeber's avatar
Jonas Waeber committed
293
294
            digital = digitalObjectValues,

295
296
            // PHYSICAL
            accessPhysical = accessPhysical,
297
            durationPhysical = Extract.listOfStrings(physicalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
298
            colourPhysical = Extract.languageContainer("P60558", physicalObject?.get("P60558")),
299
300
301
302
            physicalCharacteristics = Extract.languageContainer(
                "physicalCharacteristics",
                physicalObject?.get("physicalCharacteristics")
            ),
Jonas Waeber's avatar
Jonas Waeber committed
303
            physicalObjectNote = Extract.languageContainer("descriptiveNote", physicalObject?.get("descriptiveNote")),
304
305
306
307
            usageConditionsPhysical = Extract.languageContainer(
                "conditionsOfUse",
                physicalObject?.get("conditionsOfUse")
            ),
308
            usagePhysical = Extract.typedEntityByType(physicalRules, "type", "usage", "sameAs").flatMap { it.toList() },
309
310
            callNumber = Extract.typedEntityByType(physicalIdentifiers, "type", "callNumber", "identifier")
                .flatMap { it.toList() },
Jonas Waeber's avatar
Jonas Waeber committed
311
            published = (record[KEYS.isPublished] as Boolean?) ?: false,
Jonas Waeber's avatar
Jonas Waeber committed
312
            suggest = suggestContainerBuilder.build()[0]
Jonas Waeber's avatar
Jonas Waeber committed
313
314
315
        )
    }
}