In order to mitigate against the brute force attacks against Gitlab accounts, we are moving to all edu-ID Logins. We would like to remind you to link your account with your edu-id. Login will be possible only by edu-ID after November 30, 2021. Here you can find the instructions for linking your account.

If you don't have a SWITCH edu-ID, you can create one with this guide here

kind regards

DocumentsSearchDocBuilder.kt 14.8 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase

21
import ch.memobase.rdf.NS
Jonas Waeber's avatar
Jonas Waeber committed
22
23
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
Jonas Waeber's avatar
Jonas Waeber committed
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import org.memobase.builders.AgentContainerBuilder
import org.memobase.builders.DateContainerBuilder
import org.memobase.builders.EnrichedFacetContainerBuilder
import org.memobase.builders.FacettedContainerBuilder
import org.memobase.builders.IFieldBuilder
import org.memobase.builders.PersonFacetBuilder
import org.memobase.builders.PlaceFacetBuilder
import org.memobase.builders.SuggestContainerBuilder
import org.memobase.helpers.AspectRatio
import org.memobase.helpers.DocumentTypeMapper
import org.memobase.helpers.Extract
import org.memobase.helpers.FacetBuildHelpers
import org.memobase.helpers.Filter
import org.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractInstitution
import org.memobase.helpers.InstitutionAndRecordSetExtractionHelper.extractRecordSet
import org.memobase.helpers.KEYS
import org.memobase.helpers.ReuseStatementMap
41
import org.memobase.model.DocumentsSearchDoc
Jonas Waeber's avatar
Jonas Waeber committed
42
import org.memobase.model.EnrichedDigitalMetadata
43
import org.memobase.model.LanguageContainer
44
import org.memobase.model.Schema
Jonas Waeber's avatar
Jonas Waeber committed
45

46
47
48
class DocumentsSearchDocBuilder(
        private val documentTypeMapper: DocumentTypeMapper,
        private val mediaUrl: String) {
Jonas Waeber's avatar
Jonas Waeber committed
49
    private val log = LogManager.getLogger("SearchDocTransform")
50

51
    fun transform(key: String, input: Map<String, JsonObject>): Schema {
52

Jonas Waeber's avatar
Jonas Waeber committed
53
        val record = input["record"] ?: throw InvalidInputException("No record defined in the message.")
54
55
56
57
58
        val digitalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "digitalObject" }
        val physicalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "physicalObject" }

59
60
        val keywordIds = Extract.identifiers(record[KEYS.hasSubject])
        val genreIds = Extract.identifiers(record[KEYS.hasGenre])
Jonas Waeber's avatar
Jonas Waeber committed
61
62
63
64
        val publishedByIds = Extract.identifiers(record[KEYS.publishedBy])
        val producerIds = Extract.identifiers(record[KEYS.producer])
        val spatialIds = Extract.identifiers(record[KEYS.spatial])
        val placeOfCaptureIds = Extract.identifiers(record[KEYS.placeOfCapture])
Jonas Waeber's avatar
Jonas Waeber committed
65
66
67
        val dateCreatedIds = Extract.identifiers(record[KEYS.created])
        val dateIssuedIds = Extract.identifiers(record[KEYS.issued])
        val temporalIds = Extract.identifiers(record[KEYS.temporal])
Jonas Waeber's avatar
Jonas Waeber committed
68

69
        val personFacetBuilder = PersonFacetBuilder()
70
        val subjectPersonBuilder = AgentContainerBuilder(keywordIds, KEYS.Person, null, input)
71
72
73
74
75
        val publisherPersonBuilder = AgentContainerBuilder(publishedByIds, KEYS.Person, null, input)
        val producersPersonBuilder = AgentContainerBuilder(producerIds, KEYS.Person, null, input)
        val contributorPersonBuilder = AgentContainerBuilder(emptyList(), KEYS.Person, KEYS.contributor, input)
        val creatorPersonBuilder = AgentContainerBuilder(emptyList(), KEYS.Person, KEYS.creator, input)

76
        val subjectCorporateBodyBuilder = AgentContainerBuilder(keywordIds, KEYS.CorporateBody, null, input)
77
78
        val publisherCorporateBodyBuilder = AgentContainerBuilder(publishedByIds, KEYS.CorporateBody, null, input)
        val producersCorporateBodyBuilder = AgentContainerBuilder(producerIds, KEYS.CorporateBody, null, input)
79
80
        val contributorCorporateBodyBuilder =
            AgentContainerBuilder(emptyList(), KEYS.CorporateBody, KEYS.contributor, input)
81
82
        val creatorCorporateBodyBuilder = AgentContainerBuilder(emptyList(), KEYS.CorporateBody, KEYS.creator, input)

83
        val subjectAgentBuilder = AgentContainerBuilder(keywordIds, KEYS.Agent, null, input)
84
85
86
87
        val publisherAgentBuilder = AgentContainerBuilder(publishedByIds, KEYS.Agent, null, input)
        val producersAgentBuilder = AgentContainerBuilder(producerIds, KEYS.Agent, null, input)
        val contributorAgentBuilder = AgentContainerBuilder(emptyList(), KEYS.Agent, KEYS.contributor, input)
        val creatorAgentBuilder = AgentContainerBuilder(emptyList(), KEYS.Agent, KEYS.creator, input)
88

Jonas Waeber's avatar
Jonas Waeber committed
89
        val placesRelatedBuilder = FacettedContainerBuilder(spatialIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
90
91
        val placeCapturedBuilder =
            FacettedContainerBuilder(placeOfCaptureIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
Jonas Waeber's avatar
Jonas Waeber committed
92

93
94
        val placeFacetBuilder = PlaceFacetBuilder()

Jonas Waeber's avatar
Jonas Waeber committed
95
96
97
98
        val dateCreatedBuilder = DateContainerBuilder(dateCreatedIds)
        val dateIssuedBuilder = DateContainerBuilder(dateIssuedIds)
        val temporalBuilder = DateContainerBuilder(temporalIds)

99
        val suggestContainerBuilder = SuggestContainerBuilder(keywordIds)
Jonas Waeber's avatar
Jonas Waeber committed
100

101
102
103
        val digitalIdentifierReferences = Extract.identifiers(digitalObject?.get("identifiedBy"))
        val digitalIdentifierEntities = mutableListOf<JsonObject>()

104
105
106
107
        val formats = EnrichedFacetContainerBuilder(emptyList(), NS.rico + KEYS.CarrierType, KEYS.name)
        val languages = EnrichedFacetContainerBuilder(emptyList(), NS.rico + KEYS.Language, KEYS.name)
        val genres = EnrichedFacetContainerBuilder(genreIds, NS.skos + KEYS.Concept, KEYS.prefLabel)

108
        val id = key
109

110
        for (item in input.entries) {
111
112
113
114
115
116
117
            for (builder: IFieldBuilder in listOf(
                personFacetBuilder,
                subjectPersonBuilder,
                publisherPersonBuilder,
                producersPersonBuilder,
                contributorPersonBuilder,
                creatorPersonBuilder,
118
119
120
121
122
123
124
125
126
127
128
                subjectCorporateBodyBuilder,
                publisherCorporateBodyBuilder,
                producersCorporateBodyBuilder,
                contributorCorporateBodyBuilder,
                creatorCorporateBodyBuilder,
                subjectAgentBuilder,
                publisherAgentBuilder,
                producersAgentBuilder,
                contributorAgentBuilder,
                creatorAgentBuilder,

Jonas Waeber's avatar
Jonas Waeber committed
129
130
                placeFacetBuilder,
                placeCapturedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
131
                placesRelatedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
132
133
134
                dateCreatedBuilder,
                dateIssuedBuilder,
                temporalBuilder,
135
136
137
138
                suggestContainerBuilder,
                formats,
                genres,
                languages
139
            )) {
140
                if (builder.filter(item.value)) {
141
                    builder.append(id, item.value)
142
                }
143
144
145

                if (digitalIdentifierReferences.contains(item.key))
                    digitalIdentifierEntities.add(item.value)
146
147
            }
        }
148

Jonas Waeber's avatar
Jonas Waeber committed
149
150
151
        val recordIdentifiers = Filter.entitiesByProperty("identifiedBy", record, input)
        val recordTitles = Filter.entitiesByProperty("hasTitle", record, input)
        val recordRules = Filter.entitiesByProperty("regulatedBy", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
152

Jonas Waeber's avatar
Jonas Waeber committed
153
        val subjects = Filter.entitiesByProperty("hasSubject", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
154

Jonas Waeber's avatar
Jonas Waeber committed
155
        val digitalRules = Filter.entitiesByProperty("regulatedBy", digitalObject, input)
156

Jonas Waeber's avatar
Jonas Waeber committed
157
158
        val physicalRules = Filter.entitiesByProperty("regulatedBy", physicalObject, input)
        val physicalIdentifiers = Filter.entitiesByProperty("identifiedBy", physicalObject, input)
159

160
161
        val accessPhysical = Extract.typedEntityByType(physicalRules, "type", "access", "name").flatMap { it.toList() }
        val accessDigital = Extract.typedEntityByType(digitalRules, "type", "access", "name").flatMap { it.toList() }
162

163
        val usageDigital = Extract.typedEntityByType(digitalRules, "type", "usage", "sameAs").flatMap { it.toList() }
164

165
166
167
168
169
170
171
172
173
174
        val locator = try {
            val value = Extract.extractIdValue(digitalIdentifierEntities, KEYS.IdentifierType.main)
            if (value == null)
                ""
            else
                "${mediaUrl}${value}"
        } catch (ex: NoSuchElementException) {
            ""
        }

Jonas Waeber's avatar
Jonas Waeber committed
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
        val digitalObjectValues = digitalObject.let {
            if (it != null) {
                val width = it.getOrDefault("width", "") as String
                val height = it.getOrDefault("height", "") as String
                EnrichedDigitalMetadata(
                    hasFormat = it.getOrDefault("hasFormat", "") as String,
                    isDistributedOn = it.getOrDefault("isDistributedOn", "") as String,
                    hasMimeType = it.getOrDefault("hasMimeType", "") as String,
                    height = height,
                    width = width,
                    aspectRatio = AspectRatio.asFraction(width, height),
                    mediaResourceDescription = it.getOrDefault("mediaResourceDescription", "") as String,
                    orientation = it.getOrDefault("orientation", "") as String,
                    hasColourContent = it.getOrDefault("P60558", "") as String,
                    componentColor = Extract.listOfStrings(digitalObject?.get("componentColor"))

                )
            } else {
                EnrichedDigitalMetadata()
            }
        }

197
198
199
200
201
202
203
204
        val type = record[KEYS.ricoType].let {
            if (it == null) {
                documentTypeMapper.getValue("Andere")
            } else {
                documentTypeMapper.getValue(it as String)
            }
        }

205
        return DocumentsSearchDoc(
206
207
208
            title = Extract.typedEntityByType(recordTitles, "type", "main", "title"),
            seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"),
            broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"),
209
            type = type,
Jonas Waeber's avatar
Jonas Waeber committed
210
            sourceID = try {
211
                Extract.extractIdValue(recordIdentifiers, KEYS.IdentifierType.original) ?: "NoSourceIdFound"
Jonas Waeber's avatar
Jonas Waeber committed
212
213
214
215
            } catch (ex: NoSuchElementException) {
                log.error("No source id found for record $id")
                "NoSourceIdFound"
            },
216
            sameAs = Extract.listOfStrings(record["sameAs"]),
Jonas Waeber's avatar
Jonas Waeber committed
217
            abstract = Extract.languageContainer("abstract", record["abstract"]),
218
            recordId = id,
Jonas Waeber's avatar
Jonas Waeber committed
219
220
            institution = extractInstitution(record),
            recordSet = extractRecordSet(record),
Jonas Waeber's avatar
Jonas Waeber committed
221
222
223
224
            descriptiveNote = Extract.languageContainer("descriptiveNote", record["descriptiveNote"]),
            scopeAndContent = Extract.languageContainer("scopeAndContent", record["scopeAndContent"]),
            relatedMaterial = Extract.languageContainer("relation", record["relation"]),
            source = Extract.languageContainer("source", record["source"]),
Jonas Waeber's avatar
Jonas Waeber committed
225
226
227
            temporal = temporalBuilder.build(),
            dateCreated = dateCreatedBuilder.build(),
            dateIssued = dateIssuedBuilder.build(),
Jonas Waeber's avatar
Jonas Waeber committed
228
229
            placeCapture = placeCapturedBuilder.build(),
            placeRelated = placesRelatedBuilder.build(),
230
            placeFacet = placeFacetBuilder.build(),
231
232
            rightsHolder = Extract.typedEntityByType(recordRules, "type", "holder", "name"),
            memoriavClaim = record["P60451"] != null,
Jonas Waeber's avatar
Jonas Waeber committed
233

234
235
236
            format = formats.build(),
            language = languages.build(),
            genre = genres.build(),
237

238
239
240
241
242
243
            keywords = subjects.flatMap { Extract.languageContainer("hasSubject", it[KEYS.prefLabel]) }.let {
                if (it.isEmpty())
                    LanguageContainer.EMPTY
                else
                    it.reduce { acc, languageContainer -> acc.merge(languageContainer)
            }},
244
245
246
247
248
249
250
            personSubject = subjectPersonBuilder.build(),
            personProducer = producersPersonBuilder.build(),
            personPublisher = publisherPersonBuilder.build(),
            personContributor = contributorPersonBuilder.build(),
            personCreator = creatorPersonBuilder.build(),
            personsFacet = personFacetBuilder.build(),

251
252
253
254
255
256
257
258
259
260
261
            corporateBodySubject = subjectCorporateBodyBuilder.build(),
            corporateBodyProducer = producersCorporateBodyBuilder.build(),
            corporateBodyPublisher = publisherCorporateBodyBuilder.build(),
            corporateBodyContributor = contributorCorporateBodyBuilder.build(),
            corporateBodyCreator = creatorCorporateBodyBuilder.build(),

            agentSubject = subjectAgentBuilder.build(),
            agentProducer = producersAgentBuilder.build(),
            agentPublisher = publisherAgentBuilder.build(),
            agentContributor = contributorAgentBuilder.build(),
            agentCreator = creatorAgentBuilder.build(),
262
263
264
265
266
267

            // DIGITAL & PHYSICAL
            access = accessPhysical + accessDigital,

            // DIGITAL
            accessDigital = accessDigital,
268
269
            durationDigital = Extract.listOfStrings(digitalObject?.get("duration")),
            colourDigital = Extract.listOfStrings(digitalObject?.get("P60558")),
Jonas Waeber's avatar
Jonas Waeber committed
270
            digitalObjectNote = Extract.languageContainer("descriptiveNote", digitalObject?.get("descriptiveNote")),
271
            locator = locator,
272
273
274
275
            usageConditionsDigital = Extract.languageContainer(
                "conditionsOfUse",
                digitalObject?.get("conditionsOfUse")
            ),
276
277
278
            usageDigital = usageDigital,
            usageDigitalGroup = usageDigital.map { ReuseStatementMap.getValue(it) },

Jonas Waeber's avatar
Jonas Waeber committed
279
280
            digital = digitalObjectValues,

281
282
            // PHYSICAL
            accessPhysical = accessPhysical,
283
            durationPhysical = Extract.listOfStrings(physicalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
284
            colourPhysical = Extract.languageContainer("P60558", physicalObject?.get("P60558")),
285
286
287
288
            physicalCharacteristics = Extract.languageContainer(
                "physicalCharacteristics",
                physicalObject?.get("physicalCharacteristics")
            ),
Jonas Waeber's avatar
Jonas Waeber committed
289
            physicalObjectNote = Extract.languageContainer("descriptiveNote", physicalObject?.get("descriptiveNote")),
290
291
292
293
            usageConditionsPhysical = Extract.languageContainer(
                "conditionsOfUse",
                physicalObject?.get("conditionsOfUse")
            ),
294
            usagePhysical = Extract.typedEntityByType(physicalRules, "type", "usage", "sameAs").flatMap { it.toList() },
295
296
            callNumber = Extract.typedEntityByType(physicalIdentifiers, "type", "callNumber", "identifier")
                .flatMap { it.toList() },
Jonas Waeber's avatar
Jonas Waeber committed
297
            published = (record[KEYS.isPublished] as Boolean?) ?: false,
Jonas Waeber's avatar
Jonas Waeber committed
298
            suggest = suggestContainerBuilder.build()[0]
Jonas Waeber's avatar
Jonas Waeber committed
299
300
301
        )
    }
}