SearchDocTransform.kt 12.3 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase

import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
Jonas Waeber's avatar
Jonas Waeber committed
23
24
import org.memobase.builders.*
import org.memobase.helpers.*
Jonas Waeber's avatar
Jonas Waeber committed
25
import org.memobase.model.SearchDoc
26
import org.memobase.rdf.NS
Jonas Waeber's avatar
Jonas Waeber committed
27

28
class SearchDocTransform(private val mediaUrl: String) {
Jonas Waeber's avatar
Jonas Waeber committed
29
    private val log = LogManager.getLogger("SearchDocTransform")
Jonas Waeber's avatar
Jonas Waeber committed
30
    fun transform(input: Map<String, JsonObject>): SearchDoc {
31

Jonas Waeber's avatar
Jonas Waeber committed
32
        val record = input["record"] ?: error("No record defined in this message.")
33
34
35
36
37
        val digitalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "digitalObject" }
        val physicalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "physicalObject" }

Jonas Waeber's avatar
Jonas Waeber committed
38
39
40
41
42
        val hasSubjectIds = Extract.identifiers(record[KEYS.hasSubject])
        val publishedByIds = Extract.identifiers(record[KEYS.publishedBy])
        val producerIds = Extract.identifiers(record[KEYS.producer])
        val spatialIds = Extract.identifiers(record[KEYS.spatial])
        val placeOfCaptureIds = Extract.identifiers(record[KEYS.placeOfCapture])
Jonas Waeber's avatar
Jonas Waeber committed
43
44
45
        val dateCreatedIds = Extract.identifiers(record[KEYS.created])
        val dateIssuedIds = Extract.identifiers(record[KEYS.issued])
        val temporalIds = Extract.identifiers(record[KEYS.temporal])
Jonas Waeber's avatar
Jonas Waeber committed
46

47
        val personFacetBuilder = PersonFacetBuilder()
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
        val subjectPersonBuilder = AgentContainerBuilder(hasSubjectIds, KEYS.Person, null, input)
        val publisherPersonBuilder = AgentContainerBuilder(publishedByIds, KEYS.Person, null, input)
        val producersPersonBuilder = AgentContainerBuilder(producerIds, KEYS.Person, null, input)
        val contributorPersonBuilder = AgentContainerBuilder(emptyList(), KEYS.Person, KEYS.contributor, input)
        val creatorPersonBuilder = AgentContainerBuilder(emptyList(), KEYS.Person, KEYS.creator, input)

        val subjectCorporateBodyBuilder = AgentContainerBuilder(hasSubjectIds, KEYS.CorporateBody, null, input)
        val publisherCorporateBodyBuilder = AgentContainerBuilder(publishedByIds, KEYS.CorporateBody, null, input)
        val producersCorporateBodyBuilder = AgentContainerBuilder(producerIds, KEYS.CorporateBody, null, input)
        val contributorCorporateBodyBuilder = AgentContainerBuilder(emptyList(), KEYS.CorporateBody, KEYS.contributor, input)
        val creatorCorporateBodyBuilder = AgentContainerBuilder(emptyList(), KEYS.CorporateBody, KEYS.creator, input)

        val subjectAgentBuilder = AgentContainerBuilder(hasSubjectIds, KEYS.Agent, null, input)
        val publisherAgentBuilder = AgentContainerBuilder(publishedByIds, KEYS.Agent, null, input)
        val producersAgentBuilder = AgentContainerBuilder(producerIds, KEYS.Agent, null, input)
        val contributorAgentBuilder = AgentContainerBuilder(emptyList(), KEYS.Agent, KEYS.contributor, input)
        val creatorAgentBuilder = AgentContainerBuilder(emptyList(), KEYS.Agent, KEYS.creator, input)
65

Jonas Waeber's avatar
Jonas Waeber committed
66
        val placesRelatedBuilder = FacettedContainerBuilder(spatialIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
67
        val placeCapturedBuilder = FacettedContainerBuilder(placeOfCaptureIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
Jonas Waeber's avatar
Jonas Waeber committed
68

69
70
        val placeFacetBuilder = PlaceFacetBuilder()

Jonas Waeber's avatar
Jonas Waeber committed
71
72
73
74
        val dateCreatedBuilder = DateContainerBuilder(dateCreatedIds)
        val dateIssuedBuilder = DateContainerBuilder(dateIssuedIds)
        val temporalBuilder = DateContainerBuilder(temporalIds)

Jonas Waeber's avatar
Jonas Waeber committed
75
76
        val suggestContainerBuilder = SuggestContainerBuilder(hasSubjectIds)

77
78
79
80
        val digitalIdentifierReferences = Extract.identifiers(digitalObject?.get("identifiedBy"))
        val digitalIdentifierEntities = mutableListOf<JsonObject>()

        for (item in input.entries) {
81
82
83
84
85
86
87
            for (builder: IFieldBuilder in listOf(
                personFacetBuilder,
                subjectPersonBuilder,
                publisherPersonBuilder,
                producersPersonBuilder,
                contributorPersonBuilder,
                creatorPersonBuilder,
88
89
90
91
92
93
94
95
96
97
98
                subjectCorporateBodyBuilder,
                publisherCorporateBodyBuilder,
                producersCorporateBodyBuilder,
                contributorCorporateBodyBuilder,
                creatorCorporateBodyBuilder,
                subjectAgentBuilder,
                publisherAgentBuilder,
                producersAgentBuilder,
                contributorAgentBuilder,
                creatorAgentBuilder,

Jonas Waeber's avatar
Jonas Waeber committed
99
100
                placeFacetBuilder,
                placeCapturedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
101
                placesRelatedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
102
103
104
                dateCreatedBuilder,
                dateIssuedBuilder,
                temporalBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
105
                suggestContainerBuilder
106
            )) {
107
108
                if (builder.filter(item.value)) {
                    builder.append(item.value)
109
                }
110
111
112

                if (digitalIdentifierReferences.contains(item.key))
                    digitalIdentifierEntities.add(item.value)
113
114
            }
        }
115

Jonas Waeber's avatar
Jonas Waeber committed
116
        val id = record["@id"] as String
Jonas Waeber's avatar
Jonas Waeber committed
117

Jonas Waeber's avatar
Jonas Waeber committed
118
119
120
121
        val recordIdentifiers = Filter.entitiesByProperty("identifiedBy", record, input)
        val recordTitles = Filter.entitiesByProperty("hasTitle", record, input)
        val recordLanguages = Filter.entitiesByProperty("hasLanguage", record, input)
        val recordRules = Filter.entitiesByProperty("regulatedBy", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
122

Jonas Waeber's avatar
Jonas Waeber committed
123
124
        val genre = Filter.entitiesByProperty("hasGenre", record, input)
        val subjects = Filter.entitiesByProperty("hasSubject", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
125

Jonas Waeber's avatar
Jonas Waeber committed
126
        val digitalRules = Filter.entitiesByProperty("regulatedBy", digitalObject, input)
127

Jonas Waeber's avatar
Jonas Waeber committed
128
129
        val physicalRules = Filter.entitiesByProperty("regulatedBy", physicalObject, input)
        val physicalIdentifiers = Filter.entitiesByProperty("identifiedBy", physicalObject, input)
130

131
132
        val accessPhysical = Extract.typedEntityByType(physicalRules, "type", "access", "name").flatMap { it.toList() }
        val accessDigital = Extract.typedEntityByType(digitalRules, "type", "access", "name").flatMap { it.toList() }
133

134
        val usageDigital = Extract.typedEntityByType(digitalRules, "type", "usage", "sameAs").flatMap { it.toList() }
Jonas Waeber's avatar
Jonas Waeber committed
135
        val format = CarrierType.extract(Filter.entitiesByProperty("hasCarrierType", physicalObject, input))
136
137
138
        if (format.isEmpty()) {
            log.error("Found no carrier types for record $id.")
        }
139

140
141
142
143
144
145
146
147
148
149
150

        val locator = try {
            val value = Extract.extractIdValue(digitalIdentifierEntities, KEYS.IdentifierType.main)
            if (value == null)
                ""
            else
                "${mediaUrl}${value}"
        } catch (ex: NoSuchElementException) {
            ""
        }

Jonas Waeber's avatar
Jonas Waeber committed
151
        return SearchDoc(
152
153
154
155
            title = Extract.typedEntityByType(recordTitles, "type", "main", "title"),
            seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"),
            broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"),
            type = record["type"] as String,
Jonas Waeber's avatar
Jonas Waeber committed
156
            sourceID = try {
157
                Extract.extractIdValue(recordIdentifiers, KEYS.IdentifierType.original) ?: "NoSourceIdFound"
Jonas Waeber's avatar
Jonas Waeber committed
158
159
160
161
            } catch (ex: NoSuchElementException) {
                log.error("No source id found for record $id")
                "NoSourceIdFound"
            },
162
            sameAs = Extract.listOfStrings(record["sameAs"]),
Jonas Waeber's avatar
Jonas Waeber committed
163
            abstract = Extract.languageContainer("abstract", record["abstract"]),
Jonas Waeber's avatar
Jonas Waeber committed
164
            id = id,
165
166
            institution = Meta.extractInstitution(record),
            recordSet = Meta.extractRecordSet(record)[0],
Jonas Waeber's avatar
Jonas Waeber committed
167
168
169
170
            descriptiveNote = Extract.languageContainer("descriptiveNote", record["descriptiveNote"]),
            scopeAndContent = Extract.languageContainer("scopeAndContent", record["scopeAndContent"]),
            relatedMaterial = Extract.languageContainer("relation", record["relation"]),
            source = Extract.languageContainer("source", record["source"]),
Jonas Waeber's avatar
Jonas Waeber committed
171
172
173
            temporal = temporalBuilder.build(),
            dateCreated = dateCreatedBuilder.build(),
            dateIssued = dateIssuedBuilder.build(),
Jonas Waeber's avatar
Jonas Waeber committed
174
175
            placeCapture = placeCapturedBuilder.build(),
            placeRelated = placesRelatedBuilder.build(),
176
            placeFacet = placeFacetBuilder.build(),
177
178
179
180
181
182
183
            rightsHolder = Extract.typedEntityByType(recordRules, "type", "holder", "name"),
            memoriavClaim = record["P60451"] != null,
            languageCaption = Extract.typedEntityByType(recordLanguages, "type", "caption", "name"),
            languageContent = Extract.typedEntityByType(recordLanguages, "type", "content", "name"),
            language = Extract.facetEntity(recordLanguages, "name"),
            genre = Extract.facetEntity(genre, "prefLabel"),
            keywords = Extract.facetEntity(subjects, "prefLabel"),
184
185
186
187
188
189
190
191

            personSubject = subjectPersonBuilder.build(),
            personProducer = producersPersonBuilder.build(),
            personPublisher = publisherPersonBuilder.build(),
            personContributor = contributorPersonBuilder.build(),
            personCreator = creatorPersonBuilder.build(),
            personsFacet = personFacetBuilder.build(),

192
193
194
195
196
197
198
199
200
201
202
            corporateBodySubject = subjectCorporateBodyBuilder.build(),
            corporateBodyProducer = producersCorporateBodyBuilder.build(),
            corporateBodyPublisher = publisherCorporateBodyBuilder.build(),
            corporateBodyContributor = contributorCorporateBodyBuilder.build(),
            corporateBodyCreator = creatorCorporateBodyBuilder.build(),

            agentSubject = subjectAgentBuilder.build(),
            agentProducer = producersAgentBuilder.build(),
            agentPublisher = publisherAgentBuilder.build(),
            agentContributor = contributorAgentBuilder.build(),
            agentCreator = creatorAgentBuilder.build(),
203
204
205
206
207
208

            // DIGITAL & PHYSICAL
            access = accessPhysical + accessDigital,

            // DIGITAL
            accessDigital = accessDigital,
209
210
            durationDigital = Extract.listOfStrings(digitalObject?.get("duration")),
            colourDigital = Extract.listOfStrings(digitalObject?.get("P60558")),
Jonas Waeber's avatar
Jonas Waeber committed
211
            digitalObjectNote = Extract.languageContainer("descriptiveNote", digitalObject?.get("descriptiveNote")),
212
            locator = locator,
213
214
215
216
            usageConditionsDigital = Extract.languageContainer(
                "conditionsOfUse",
                digitalObject?.get("conditionsOfUse")
            ),
217
218
219
220
221
            usageDigital = usageDigital,
            usageDigitalGroup = usageDigital.map { ReuseStatementMap.getValue(it) },

            // PHYSICAL
            accessPhysical = accessPhysical,
222
            durationPhysical = Extract.listOfStrings(physicalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
223
            colourPhysical = Extract.languageContainer("P60558", physicalObject?.get("P60558")),
224
225
226
227
            physicalCharacteristics = Extract.languageContainer(
                "physicalCharacteristics",
                physicalObject?.get("physicalCharacteristics")
            ),
Jonas Waeber's avatar
Jonas Waeber committed
228
            physicalObjectNote = Extract.languageContainer("descriptiveNote", physicalObject?.get("descriptiveNote")),
229
230
231
232
            usageConditionsPhysical = Extract.languageContainer(
                "conditionsOfUse",
                physicalObject?.get("conditionsOfUse")
            ),
233
            usagePhysical = Extract.typedEntityByType(physicalRules, "type", "usage", "sameAs").flatMap { it.toList() },
234
235
            callNumber = Extract.typedEntityByType(physicalIdentifiers, "type", "callNumber", "identifier")
                .flatMap { it.toList() },
Jonas Waeber's avatar
Jonas Waeber committed
236
            format = format,
Jonas Waeber's avatar
Jonas Waeber committed
237
            published = (record[KEYS.isPublished] as Boolean?) ?: false,
Jonas Waeber's avatar
Jonas Waeber committed
238
            suggest = suggestContainerBuilder.build()[0]
Jonas Waeber's avatar
Jonas Waeber committed
239
240
241
        )
    }
}