SearchDocTransform.kt 11.8 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase

import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
Jonas Waeber's avatar
Jonas Waeber committed
23
24
import org.memobase.builders.*
import org.memobase.helpers.*
Jonas Waeber's avatar
Jonas Waeber committed
25
import org.memobase.model.FacettedContainer
Jonas Waeber's avatar
Jonas Waeber committed
26
27
import org.memobase.model.LanguageContainer
import org.memobase.model.SearchDoc
28
import org.memobase.rdf.NS
Jonas Waeber's avatar
Jonas Waeber committed
29

Jonas Waeber's avatar
Jonas Waeber committed
30
class SearchDocTransform {
Jonas Waeber's avatar
Jonas Waeber committed
31
    private val log = LogManager.getLogger("SearchDocTransform")
Jonas Waeber's avatar
Jonas Waeber committed
32
    fun transform(input: Map<String, JsonObject>): SearchDoc {
33

Jonas Waeber's avatar
Jonas Waeber committed
34
        val record = input["record"] ?: error("No record defined in this message.")
35
36
37
38
39
        val digitalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "digitalObject" }
        val physicalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "physicalObject" }

Jonas Waeber's avatar
Jonas Waeber committed
40
41
42
43
44
        val hasSubjectIds = Extract.identifiers(record[KEYS.hasSubject])
        val publishedByIds = Extract.identifiers(record[KEYS.publishedBy])
        val producerIds = Extract.identifiers(record[KEYS.producer])
        val spatialIds = Extract.identifiers(record[KEYS.spatial])
        val placeOfCaptureIds = Extract.identifiers(record[KEYS.placeOfCapture])
Jonas Waeber's avatar
Jonas Waeber committed
45
46
47
        val dateCreatedIds = Extract.identifiers(record[KEYS.created])
        val dateIssuedIds = Extract.identifiers(record[KEYS.issued])
        val temporalIds = Extract.identifiers(record[KEYS.temporal])
Jonas Waeber's avatar
Jonas Waeber committed
48

49
        val personFacetBuilder = PersonFacetBuilder()
Jonas Waeber's avatar
Jonas Waeber committed
50
51
52
53
54
        val subjectPersonBuilder = PersonContainerBuilder(hasSubjectIds, null, input)
        val publisherPersonBuilder = PersonContainerBuilder(publishedByIds, null, input)
        val producersPersonBuilder = PersonContainerBuilder(producerIds, null, input)
        val contributorPersonBuilder = PersonContainerBuilder(emptyList(), KEYS.contributor, input)
        val creatorPersonBuilder = PersonContainerBuilder(emptyList(), KEYS.creator, input)
55

Jonas Waeber's avatar
Jonas Waeber committed
56
57
58
        val placesRelatedBuilder = FacettedContainerBuilder(spatialIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
        val placeCapturedBuilder =
            FacettedContainerBuilder(placeOfCaptureIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
Jonas Waeber's avatar
Jonas Waeber committed
59

60
61
        val placeFacetBuilder = PlaceFacetBuilder()

Jonas Waeber's avatar
Jonas Waeber committed
62
63
64
65
        val dateCreatedBuilder = DateContainerBuilder(dateCreatedIds)
        val dateIssuedBuilder = DateContainerBuilder(dateIssuedIds)
        val temporalBuilder = DateContainerBuilder(temporalIds)

Jonas Waeber's avatar
Jonas Waeber committed
66
67
        val suggestContainerBuilder = SuggestContainerBuilder(hasSubjectIds)

68
69
70
71
72
73
74
75
        for (item in input.values) {
            for (builder: IFieldBuilder in listOf(
                personFacetBuilder,
                subjectPersonBuilder,
                publisherPersonBuilder,
                producersPersonBuilder,
                contributorPersonBuilder,
                creatorPersonBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
76
77
                placeFacetBuilder,
                placeCapturedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
78
                placesRelatedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
79
80
81
                dateCreatedBuilder,
                dateIssuedBuilder,
                temporalBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
82
                suggestContainerBuilder
83
84
85
86
87
88
            )) {
                if (builder.filter(item)) {
                    builder.append(item)
                }
            }
        }
89

Jonas Waeber's avatar
Jonas Waeber committed
90
        val id = record["@id"] as String
Jonas Waeber's avatar
Jonas Waeber committed
91

Jonas Waeber's avatar
Jonas Waeber committed
92
93
94
95
        val recordIdentifiers = Filter.entitiesByProperty("identifiedBy", record, input)
        val recordTitles = Filter.entitiesByProperty("hasTitle", record, input)
        val recordLanguages = Filter.entitiesByProperty("hasLanguage", record, input)
        val recordRules = Filter.entitiesByProperty("regulatedBy", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
96

Jonas Waeber's avatar
Jonas Waeber committed
97
98
        val genre = Filter.entitiesByProperty("hasGenre", record, input)
        val subjects = Filter.entitiesByProperty("hasSubject", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
99

Jonas Waeber's avatar
Jonas Waeber committed
100
101
        val publishers = Filter.entitiesByProperty("publishedBy", record, input)
        val producers = Filter.entitiesByProperty("P60441", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
102
        val creationRelationAgents =
Jonas Waeber's avatar
Jonas Waeber committed
103
            Filter.entitiesByProperty("recordResourceOrInstantiationIsSourceOfCreationRelation", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
104

Jonas Waeber's avatar
Jonas Waeber committed
105
        val digitalRules = Filter.entitiesByProperty("regulatedBy", digitalObject, input)
106

Jonas Waeber's avatar
Jonas Waeber committed
107
108
        val physicalRules = Filter.entitiesByProperty("regulatedBy", physicalObject, input)
        val physicalIdentifiers = Filter.entitiesByProperty("identifiedBy", physicalObject, input)
109

110
111
        val accessPhysical = Extract.typedEntityByType(physicalRules, "type", "access", "name").flatMap { it.toList() }
        val accessDigital = Extract.typedEntityByType(digitalRules, "type", "access", "name").flatMap { it.toList() }
112

113
        val usageDigital = Extract.typedEntityByType(digitalRules, "type", "usage", "sameAs").flatMap { it.toList() }
Jonas Waeber's avatar
Jonas Waeber committed
114
        val format = CarrierType.extract(Filter.entitiesByProperty("hasCarrierType", physicalObject, input))
115
116
117
        if (format.isEmpty()) {
            log.error("Found no carrier types for record $id.")
        }
118

Jonas Waeber's avatar
Jonas Waeber committed
119
        return SearchDoc(
120
121
122
123
            title = Extract.typedEntityByType(recordTitles, "type", "main", "title"),
            seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"),
            broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"),
            type = record["type"] as String,
Jonas Waeber's avatar
Jonas Waeber committed
124
            sourceID = try {
125
                Extract.extractSourceId(recordIdentifiers)
Jonas Waeber's avatar
Jonas Waeber committed
126
127
128
129
            } catch (ex: NoSuchElementException) {
                log.error("No source id found for record $id")
                "NoSourceIdFound"
            },
130
            sameAs = Extract.listOfStrings(record["sameAs"]),
Jonas Waeber's avatar
Jonas Waeber committed
131
            abstract = Extract.languageContainer("abstract", record["abstract"]),
Jonas Waeber's avatar
Jonas Waeber committed
132
133
            id = id,
            institution = listOf(
Jonas Waeber's avatar
Jonas Waeber committed
134
                FacettedContainer(
135
136
                    LanguageContainer(emptyList(), emptyList(), emptyList(), emptyList()),
                    listOf(record["heldBy"] as String)
Jonas Waeber's avatar
Jonas Waeber committed
137
138
                )
            ),
Jonas Waeber's avatar
Jonas Waeber committed
139
            recordSet = FacettedContainer(
140
141
                LanguageContainer(emptyList(), emptyList(), emptyList(), emptyList()),
                listOf(record["isPartOf"] as String)
Jonas Waeber's avatar
Jonas Waeber committed
142
            ),
Jonas Waeber's avatar
Jonas Waeber committed
143
144
145
146
            descriptiveNote = Extract.languageContainer("descriptiveNote", record["descriptiveNote"]),
            scopeAndContent = Extract.languageContainer("scopeAndContent", record["scopeAndContent"]),
            relatedMaterial = Extract.languageContainer("relation", record["relation"]),
            source = Extract.languageContainer("source", record["source"]),
Jonas Waeber's avatar
Jonas Waeber committed
147
148
149
            temporal = temporalBuilder.build(),
            dateCreated = dateCreatedBuilder.build(),
            dateIssued = dateIssuedBuilder.build(),
Jonas Waeber's avatar
Jonas Waeber committed
150
151
            placeCapture = placeCapturedBuilder.build(),
            placeRelated = placesRelatedBuilder.build(),
152
            placeFacet = placeFacetBuilder.build(),
153
154
155
156
157
158
159
            rightsHolder = Extract.typedEntityByType(recordRules, "type", "holder", "name"),
            memoriavClaim = record["P60451"] != null,
            languageCaption = Extract.typedEntityByType(recordLanguages, "type", "caption", "name"),
            languageContent = Extract.typedEntityByType(recordLanguages, "type", "content", "name"),
            language = Extract.facetEntity(recordLanguages, "name"),
            genre = Extract.facetEntity(genre, "prefLabel"),
            keywords = Extract.facetEntity(subjects, "prefLabel"),
160
161
162
163
164
165
166
167

            personSubject = subjectPersonBuilder.build(),
            personProducer = producersPersonBuilder.build(),
            personPublisher = publisherPersonBuilder.build(),
            personContributor = contributorPersonBuilder.build(),
            personCreator = creatorPersonBuilder.build(),
            personsFacet = personFacetBuilder.build(),

168
169
170
            corporateBodySubject = Extract.typedEntityByType(subjects, "@type", "CorporateBody", "name"),
            corporateBodyProducer = Extract.typedEntityByType(producers, "@type", "CorporateBody", "name"),
            corporateBodyPublisher = Extract.typedEntityByType(publishers, "@type", "CorporateBody", "name"),
Jonas Waeber's avatar
Jonas Waeber committed
171
172
173
174
            corporateBodyContributor = Extract.extractTypedCreationRelationAgent(
                creationRelationAgents,
                input,
                creationRelationTypeParam = "contributor",
175
                agentTypeParam = NS.rico + "CorporateBody"
Jonas Waeber's avatar
Jonas Waeber committed
176
            ),
177
178

            corporateBodyCreator = Extract.extractTypedCreationRelationAgent(
Jonas Waeber's avatar
Jonas Waeber committed
179
180
181
                creationRelationAgents,
                input,
                creationRelationTypeParam = "creator",
182
                agentTypeParam = NS.rico + "CorporateBody"
Jonas Waeber's avatar
Jonas Waeber committed
183
            ),
184
185
186
187
            agentSubject = Extract.typedEntityByType(subjects, "@type", "Agent", "name"),
            agentProducer = Extract.typedEntityByType(producers, "@type", "Agent", "name"),
            agentPublisher = Extract.typedEntityByType(publishers, "@type", "Agent", "name"),
            agentContributor = Extract.extractTypedCreationRelationAgent(
Jonas Waeber's avatar
Jonas Waeber committed
188
189
                creationRelationAgents,
                input,
190
191
                creationRelationTypeParam = "contributor",
                agentTypeParam = NS.rico + "Agent"
Jonas Waeber's avatar
Jonas Waeber committed
192
            ),
193
            agentCreator = Extract.extractTypedCreationRelationAgent(
Jonas Waeber's avatar
Jonas Waeber committed
194
195
196
                creationRelationAgents,
                input,
                creationRelationTypeParam = "creator",
197
                agentTypeParam = NS.rico + "Agent"
Jonas Waeber's avatar
Jonas Waeber committed
198
            ),
199
200
201
202
203
204

            // DIGITAL & PHYSICAL
            access = accessPhysical + accessDigital,

            // DIGITAL
            accessDigital = accessDigital,
205
206
            durationDigital = Extract.listOfStrings(digitalObject?.get("duration")),
            colourDigital = Extract.listOfStrings(digitalObject?.get("P60558")),
Jonas Waeber's avatar
Jonas Waeber committed
207
            digitalObjectNote = Extract.languageContainer("descriptiveNote", digitalObject?.get("descriptiveNote")),
208
            locator = Extract.listOfStrings(digitalObject?.get("locator")),
209
210
211
212
            usageConditionsDigital = Extract.languageContainer(
                "conditionsOfUse",
                digitalObject?.get("conditionsOfUse")
            ),
213
214
215
216
217
            usageDigital = usageDigital,
            usageDigitalGroup = usageDigital.map { ReuseStatementMap.getValue(it) },

            // PHYSICAL
            accessPhysical = accessPhysical,
218
            durationPhysical = Extract.listOfStrings(physicalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
219
            colourPhysical = Extract.languageContainer("P60558", physicalObject?.get("P60558")),
220
221
222
223
            physicalCharacteristics = Extract.languageContainer(
                "physicalCharacteristics",
                physicalObject?.get("physicalCharacteristics")
            ),
Jonas Waeber's avatar
Jonas Waeber committed
224
            physicalObjectNote = Extract.languageContainer("descriptiveNote", physicalObject?.get("descriptiveNote")),
225
226
227
228
            usageConditionsPhysical = Extract.languageContainer(
                "conditionsOfUse",
                physicalObject?.get("conditionsOfUse")
            ),
229
            usagePhysical = Extract.typedEntityByType(physicalRules, "type", "usage", "sameAs").flatMap { it.toList() },
230
231
            callNumber = Extract.typedEntityByType(physicalIdentifiers, "type", "callNumber", "identifier")
                .flatMap { it.toList() },
Jonas Waeber's avatar
Jonas Waeber committed
232
233
            format = format,
            suggest = suggestContainerBuilder.build()[0]
Jonas Waeber's avatar
Jonas Waeber committed
234
235
236
        )
    }
}