SearchDocTransform.kt 13.6 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase

import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
Jonas Waeber's avatar
Jonas Waeber committed
23
24
import org.memobase.builders.*
import org.memobase.helpers.*
Jonas Waeber's avatar
Jonas Waeber committed
25
import org.memobase.model.EnrichedDigitalMetadata
Jonas Waeber's avatar
Jonas Waeber committed
26
import org.memobase.model.SearchDoc
27
import org.memobase.rdf.NS
Jonas Waeber's avatar
Jonas Waeber committed
28

29
class SearchDocTransform(private val mediaUrl: String) {
Jonas Waeber's avatar
Jonas Waeber committed
30
    private val log = LogManager.getLogger("SearchDocTransform")
Jonas Waeber's avatar
Jonas Waeber committed
31
    fun transform(input: Map<String, JsonObject>): SearchDoc {
32

Jonas Waeber's avatar
Jonas Waeber committed
33
        val record = input["record"] ?: throw InvalidInputException("No record defined in the message.")
34
35
36
37
38
        val digitalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "digitalObject" }
        val physicalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "physicalObject" }

Jonas Waeber's avatar
Jonas Waeber committed
39
40
41
42
43
        val hasSubjectIds = Extract.identifiers(record[KEYS.hasSubject])
        val publishedByIds = Extract.identifiers(record[KEYS.publishedBy])
        val producerIds = Extract.identifiers(record[KEYS.producer])
        val spatialIds = Extract.identifiers(record[KEYS.spatial])
        val placeOfCaptureIds = Extract.identifiers(record[KEYS.placeOfCapture])
Jonas Waeber's avatar
Jonas Waeber committed
44
45
46
        val dateCreatedIds = Extract.identifiers(record[KEYS.created])
        val dateIssuedIds = Extract.identifiers(record[KEYS.issued])
        val temporalIds = Extract.identifiers(record[KEYS.temporal])
Jonas Waeber's avatar
Jonas Waeber committed
47

48
        val personFacetBuilder = PersonFacetBuilder()
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
        val subjectPersonBuilder = AgentContainerBuilder(hasSubjectIds, KEYS.Person, null, input)
        val publisherPersonBuilder = AgentContainerBuilder(publishedByIds, KEYS.Person, null, input)
        val producersPersonBuilder = AgentContainerBuilder(producerIds, KEYS.Person, null, input)
        val contributorPersonBuilder = AgentContainerBuilder(emptyList(), KEYS.Person, KEYS.contributor, input)
        val creatorPersonBuilder = AgentContainerBuilder(emptyList(), KEYS.Person, KEYS.creator, input)

        val subjectCorporateBodyBuilder = AgentContainerBuilder(hasSubjectIds, KEYS.CorporateBody, null, input)
        val publisherCorporateBodyBuilder = AgentContainerBuilder(publishedByIds, KEYS.CorporateBody, null, input)
        val producersCorporateBodyBuilder = AgentContainerBuilder(producerIds, KEYS.CorporateBody, null, input)
        val contributorCorporateBodyBuilder = AgentContainerBuilder(emptyList(), KEYS.CorporateBody, KEYS.contributor, input)
        val creatorCorporateBodyBuilder = AgentContainerBuilder(emptyList(), KEYS.CorporateBody, KEYS.creator, input)

        val subjectAgentBuilder = AgentContainerBuilder(hasSubjectIds, KEYS.Agent, null, input)
        val publisherAgentBuilder = AgentContainerBuilder(publishedByIds, KEYS.Agent, null, input)
        val producersAgentBuilder = AgentContainerBuilder(producerIds, KEYS.Agent, null, input)
        val contributorAgentBuilder = AgentContainerBuilder(emptyList(), KEYS.Agent, KEYS.contributor, input)
        val creatorAgentBuilder = AgentContainerBuilder(emptyList(), KEYS.Agent, KEYS.creator, input)
66

Jonas Waeber's avatar
Jonas Waeber committed
67
        val placesRelatedBuilder = FacettedContainerBuilder(spatialIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
68
        val placeCapturedBuilder = FacettedContainerBuilder(placeOfCaptureIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
Jonas Waeber's avatar
Jonas Waeber committed
69

70
71
        val placeFacetBuilder = PlaceFacetBuilder()

Jonas Waeber's avatar
Jonas Waeber committed
72
73
74
75
        val dateCreatedBuilder = DateContainerBuilder(dateCreatedIds)
        val dateIssuedBuilder = DateContainerBuilder(dateIssuedIds)
        val temporalBuilder = DateContainerBuilder(temporalIds)

Jonas Waeber's avatar
Jonas Waeber committed
76
77
        val suggestContainerBuilder = SuggestContainerBuilder(hasSubjectIds)

78
79
80
        val digitalIdentifierReferences = Extract.identifiers(digitalObject?.get("identifiedBy"))
        val digitalIdentifierEntities = mutableListOf<JsonObject>()

81
82
        val id = record["@id"] as String

83
        for (item in input.entries) {
84
85
86
87
88
89
90
            for (builder: IFieldBuilder in listOf(
                personFacetBuilder,
                subjectPersonBuilder,
                publisherPersonBuilder,
                producersPersonBuilder,
                contributorPersonBuilder,
                creatorPersonBuilder,
91
92
93
94
95
96
97
98
99
100
101
                subjectCorporateBodyBuilder,
                publisherCorporateBodyBuilder,
                producersCorporateBodyBuilder,
                contributorCorporateBodyBuilder,
                creatorCorporateBodyBuilder,
                subjectAgentBuilder,
                publisherAgentBuilder,
                producersAgentBuilder,
                contributorAgentBuilder,
                creatorAgentBuilder,

Jonas Waeber's avatar
Jonas Waeber committed
102
103
                placeFacetBuilder,
                placeCapturedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
104
                placesRelatedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
105
106
107
                dateCreatedBuilder,
                dateIssuedBuilder,
                temporalBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
108
                suggestContainerBuilder
109
            )) {
110
                if (builder.filter(item.value)) {
111
                    builder.append(id, item.value)
112
                }
113
114
115

                if (digitalIdentifierReferences.contains(item.key))
                    digitalIdentifierEntities.add(item.value)
116
117
            }
        }
118

119

Jonas Waeber's avatar
Jonas Waeber committed
120

Jonas Waeber's avatar
Jonas Waeber committed
121
122
123
124
        val recordIdentifiers = Filter.entitiesByProperty("identifiedBy", record, input)
        val recordTitles = Filter.entitiesByProperty("hasTitle", record, input)
        val recordLanguages = Filter.entitiesByProperty("hasLanguage", record, input)
        val recordRules = Filter.entitiesByProperty("regulatedBy", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
125

Jonas Waeber's avatar
Jonas Waeber committed
126
127
        val genre = Filter.entitiesByProperty("hasGenre", record, input)
        val subjects = Filter.entitiesByProperty("hasSubject", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
128

Jonas Waeber's avatar
Jonas Waeber committed
129
        val digitalRules = Filter.entitiesByProperty("regulatedBy", digitalObject, input)
130

Jonas Waeber's avatar
Jonas Waeber committed
131
132
        val physicalRules = Filter.entitiesByProperty("regulatedBy", physicalObject, input)
        val physicalIdentifiers = Filter.entitiesByProperty("identifiedBy", physicalObject, input)
133

134
135
        val accessPhysical = Extract.typedEntityByType(physicalRules, "type", "access", "name").flatMap { it.toList() }
        val accessDigital = Extract.typedEntityByType(digitalRules, "type", "access", "name").flatMap { it.toList() }
136

137
        val usageDigital = Extract.typedEntityByType(digitalRules, "type", "usage", "sameAs").flatMap { it.toList() }
Jonas Waeber's avatar
Jonas Waeber committed
138
        val format = CarrierType.extract(Filter.entitiesByProperty("hasCarrierType", physicalObject, input))
139
140
141
        if (format.isEmpty()) {
            log.error("Found no carrier types for record $id.")
        }
142

143
144
145
146
147
148
149
150
151
152
153

        val locator = try {
            val value = Extract.extractIdValue(digitalIdentifierEntities, KEYS.IdentifierType.main)
            if (value == null)
                ""
            else
                "${mediaUrl}${value}"
        } catch (ex: NoSuchElementException) {
            ""
        }

Jonas Waeber's avatar
Jonas Waeber committed
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
        val digitalObjectValues = digitalObject.let {
            if (it != null) {
                val width = it.getOrDefault("width", "") as String
                val height = it.getOrDefault("height", "") as String
                EnrichedDigitalMetadata(
                    hasFormat = it.getOrDefault("hasFormat", "") as String,
                    isDistributedOn = it.getOrDefault("isDistributedOn", "") as String,
                    hasMimeType = it.getOrDefault("hasMimeType", "") as String,
                    height = height,
                    width = width,
                    aspectRatio = AspectRatio.asFraction(width, height),
                    mediaResourceDescription = it.getOrDefault("mediaResourceDescription", "") as String,
                    orientation = it.getOrDefault("orientation", "") as String,
                    hasColourContent = it.getOrDefault("P60558", "") as String,
                    componentColor = Extract.listOfStrings(digitalObject?.get("componentColor"))

                )
            } else {
                EnrichedDigitalMetadata()
            }
        }

Jonas Waeber's avatar
Jonas Waeber committed
176
        return SearchDoc(
177
178
179
            title = Extract.typedEntityByType(recordTitles, "type", "main", "title"),
            seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"),
            broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"),
Jonas Waeber's avatar
Jonas Waeber committed
180
            type = record.getOrDefault("type", "Foto") as String,
Jonas Waeber's avatar
Jonas Waeber committed
181
            sourceID = try {
182
                Extract.extractIdValue(recordIdentifiers, KEYS.IdentifierType.original) ?: "NoSourceIdFound"
Jonas Waeber's avatar
Jonas Waeber committed
183
184
185
186
            } catch (ex: NoSuchElementException) {
                log.error("No source id found for record $id")
                "NoSourceIdFound"
            },
187
            sameAs = Extract.listOfStrings(record["sameAs"]),
Jonas Waeber's avatar
Jonas Waeber committed
188
            abstract = Extract.languageContainer("abstract", record["abstract"]),
Jonas Waeber's avatar
Jonas Waeber committed
189
            id = id,
190
            institution = Meta.extractInstitution(record),
Jonas Waeber's avatar
Jonas Waeber committed
191
            recordSet = Meta.extractRecordSet(record),
Jonas Waeber's avatar
Jonas Waeber committed
192
193
194
195
            descriptiveNote = Extract.languageContainer("descriptiveNote", record["descriptiveNote"]),
            scopeAndContent = Extract.languageContainer("scopeAndContent", record["scopeAndContent"]),
            relatedMaterial = Extract.languageContainer("relation", record["relation"]),
            source = Extract.languageContainer("source", record["source"]),
Jonas Waeber's avatar
Jonas Waeber committed
196
197
198
            temporal = temporalBuilder.build(),
            dateCreated = dateCreatedBuilder.build(),
            dateIssued = dateIssuedBuilder.build(),
Jonas Waeber's avatar
Jonas Waeber committed
199
200
            placeCapture = placeCapturedBuilder.build(),
            placeRelated = placesRelatedBuilder.build(),
201
            placeFacet = placeFacetBuilder.build(),
202
203
204
205
206
207
208
            rightsHolder = Extract.typedEntityByType(recordRules, "type", "holder", "name"),
            memoriavClaim = record["P60451"] != null,
            languageCaption = Extract.typedEntityByType(recordLanguages, "type", "caption", "name"),
            languageContent = Extract.typedEntityByType(recordLanguages, "type", "content", "name"),
            language = Extract.facetEntity(recordLanguages, "name"),
            genre = Extract.facetEntity(genre, "prefLabel"),
            keywords = Extract.facetEntity(subjects, "prefLabel"),
209
210
211
212
213
214
215
216

            personSubject = subjectPersonBuilder.build(),
            personProducer = producersPersonBuilder.build(),
            personPublisher = publisherPersonBuilder.build(),
            personContributor = contributorPersonBuilder.build(),
            personCreator = creatorPersonBuilder.build(),
            personsFacet = personFacetBuilder.build(),

217
218
219
220
221
222
223
224
225
226
227
            corporateBodySubject = subjectCorporateBodyBuilder.build(),
            corporateBodyProducer = producersCorporateBodyBuilder.build(),
            corporateBodyPublisher = publisherCorporateBodyBuilder.build(),
            corporateBodyContributor = contributorCorporateBodyBuilder.build(),
            corporateBodyCreator = creatorCorporateBodyBuilder.build(),

            agentSubject = subjectAgentBuilder.build(),
            agentProducer = producersAgentBuilder.build(),
            agentPublisher = publisherAgentBuilder.build(),
            agentContributor = contributorAgentBuilder.build(),
            agentCreator = creatorAgentBuilder.build(),
228
229
230
231
232
233

            // DIGITAL & PHYSICAL
            access = accessPhysical + accessDigital,

            // DIGITAL
            accessDigital = accessDigital,
234
235
            durationDigital = Extract.listOfStrings(digitalObject?.get("duration")),
            colourDigital = Extract.listOfStrings(digitalObject?.get("P60558")),
Jonas Waeber's avatar
Jonas Waeber committed
236
            digitalObjectNote = Extract.languageContainer("descriptiveNote", digitalObject?.get("descriptiveNote")),
237
            locator = locator,
238
239
240
241
            usageConditionsDigital = Extract.languageContainer(
                "conditionsOfUse",
                digitalObject?.get("conditionsOfUse")
            ),
242
243
244
            usageDigital = usageDigital,
            usageDigitalGroup = usageDigital.map { ReuseStatementMap.getValue(it) },

Jonas Waeber's avatar
Jonas Waeber committed
245
246
            digital = digitalObjectValues,

247
248
            // PHYSICAL
            accessPhysical = accessPhysical,
249
            durationPhysical = Extract.listOfStrings(physicalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
250
            colourPhysical = Extract.languageContainer("P60558", physicalObject?.get("P60558")),
251
252
253
254
            physicalCharacteristics = Extract.languageContainer(
                "physicalCharacteristics",
                physicalObject?.get("physicalCharacteristics")
            ),
Jonas Waeber's avatar
Jonas Waeber committed
255
            physicalObjectNote = Extract.languageContainer("descriptiveNote", physicalObject?.get("descriptiveNote")),
256
257
258
259
            usageConditionsPhysical = Extract.languageContainer(
                "conditionsOfUse",
                physicalObject?.get("conditionsOfUse")
            ),
260
            usagePhysical = Extract.typedEntityByType(physicalRules, "type", "usage", "sameAs").flatMap { it.toList() },
261
262
            callNumber = Extract.typedEntityByType(physicalIdentifiers, "type", "callNumber", "identifier")
                .flatMap { it.toList() },
Jonas Waeber's avatar
Jonas Waeber committed
263
            format = format,
Jonas Waeber's avatar
Jonas Waeber committed
264
            published = (record[KEYS.isPublished] as Boolean?) ?: false,
Jonas Waeber's avatar
Jonas Waeber committed
265
            suggest = suggestContainerBuilder.build()[0]
Jonas Waeber's avatar
Jonas Waeber committed
266
267
268
        )
    }
}