SearchDocTransform.kt 11.9 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase

import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
Jonas Waeber's avatar
Jonas Waeber committed
23
import org.memobase.builders.FacettedContainerBuilder
24
25
26
27
import org.memobase.builders.IFieldBuilder
import org.memobase.builders.PersonContainerBuilder
import org.memobase.builders.PersonFacetBuilder
import org.memobase.builders.PlaceFacetBuilder
Jonas Waeber's avatar
Jonas Waeber committed
28
import org.memobase.builders.SuggestContainerBuilder
Jonas Waeber's avatar
Jonas Waeber committed
29
import org.memobase.helpers.CarrierType
Jonas Waeber's avatar
Jonas Waeber committed
30
import org.memobase.helpers.Extract
Jonas Waeber's avatar
Jonas Waeber committed
31
import org.memobase.helpers.FacetBuildHelpers
Jonas Waeber's avatar
Jonas Waeber committed
32
import org.memobase.helpers.Filter
33
import org.memobase.helpers.ReuseStatementMap
Jonas Waeber's avatar
Jonas Waeber committed
34
import org.memobase.model.FacettedContainer
Jonas Waeber's avatar
Jonas Waeber committed
35
36
import org.memobase.model.LanguageContainer
import org.memobase.model.SearchDoc
37
import org.memobase.rdf.NS
Jonas Waeber's avatar
Jonas Waeber committed
38

Jonas Waeber's avatar
Jonas Waeber committed
39
class SearchDocTransform {
Jonas Waeber's avatar
Jonas Waeber committed
40
    private val log = LogManager.getLogger("SearchDocTransform")
Jonas Waeber's avatar
Jonas Waeber committed
41
    fun transform(input: Map<String, JsonObject>): SearchDoc {
42

Jonas Waeber's avatar
Jonas Waeber committed
43
        val record = input["record"] ?: error("No record defined in this message.")
44
45
46
47
48
        val digitalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "digitalObject" }
        val physicalObject =
            input.values.firstOrNull { it["@type"] == NS.rico + "Instantiation" && it["type"] == "physicalObject" }

Jonas Waeber's avatar
Jonas Waeber committed
49
50
51
52
53
54
        val hasSubjectIds = Extract.identifiers(record[KEYS.hasSubject])
        val publishedByIds = Extract.identifiers(record[KEYS.publishedBy])
        val producerIds = Extract.identifiers(record[KEYS.producer])
        val spatialIds = Extract.identifiers(record[KEYS.spatial])
        val placeOfCaptureIds = Extract.identifiers(record[KEYS.placeOfCapture])

55
        val personFacetBuilder = PersonFacetBuilder()
Jonas Waeber's avatar
Jonas Waeber committed
56
57
58
59
60
        val subjectPersonBuilder = PersonContainerBuilder(hasSubjectIds, null, input)
        val publisherPersonBuilder = PersonContainerBuilder(publishedByIds, null, input)
        val producersPersonBuilder = PersonContainerBuilder(producerIds, null, input)
        val contributorPersonBuilder = PersonContainerBuilder(emptyList(), KEYS.contributor, input)
        val creatorPersonBuilder = PersonContainerBuilder(emptyList(), KEYS.creator, input)
61

Jonas Waeber's avatar
Jonas Waeber committed
62
63
64
        val placesRelatedBuilder = FacettedContainerBuilder(spatialIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
        val placeCapturedBuilder =
            FacettedContainerBuilder(placeOfCaptureIds, KEYS.Place, KEYS.name, FacetBuildHelpers::place)
Jonas Waeber's avatar
Jonas Waeber committed
65

66
67
        val placeFacetBuilder = PlaceFacetBuilder()

Jonas Waeber's avatar
Jonas Waeber committed
68
69
        val suggestContainerBuilder = SuggestContainerBuilder(hasSubjectIds)

70
71
72
73
74
75
76
77
        for (item in input.values) {
            for (builder: IFieldBuilder in listOf(
                personFacetBuilder,
                subjectPersonBuilder,
                publisherPersonBuilder,
                producersPersonBuilder,
                contributorPersonBuilder,
                creatorPersonBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
78
79
                placeFacetBuilder,
                placeCapturedBuilder,
Jonas Waeber's avatar
Jonas Waeber committed
80
81
                placesRelatedBuilder,
                suggestContainerBuilder
82
83
84
85
86
87
            )) {
                if (builder.filter(item)) {
                    builder.append(item)
                }
            }
        }
88

Jonas Waeber's avatar
Jonas Waeber committed
89
        val id = record["@id"] as String
Jonas Waeber's avatar
Jonas Waeber committed
90

Jonas Waeber's avatar
Jonas Waeber committed
91
92
93
94
        val recordIdentifiers = Filter.entitiesByProperty("identifiedBy", record, input)
        val recordTitles = Filter.entitiesByProperty("hasTitle", record, input)
        val recordLanguages = Filter.entitiesByProperty("hasLanguage", record, input)
        val recordRules = Filter.entitiesByProperty("regulatedBy", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
95

Jonas Waeber's avatar
Jonas Waeber committed
96
97
98
        val datesCreated = Filter.entitiesByProperty("created", record, input)
        val datesIssued = Filter.entitiesByProperty("issued", record, input)
        val temporal = Filter.entitiesByProperty("temporal", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
99

Jonas Waeber's avatar
Jonas Waeber committed
100
101
        val genre = Filter.entitiesByProperty("hasGenre", record, input)
        val subjects = Filter.entitiesByProperty("hasSubject", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
102

Jonas Waeber's avatar
Jonas Waeber committed
103
104
        val publishers = Filter.entitiesByProperty("publishedBy", record, input)
        val producers = Filter.entitiesByProperty("P60441", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
105
        val creationRelationAgents =
Jonas Waeber's avatar
Jonas Waeber committed
106
            Filter.entitiesByProperty("recordResourceOrInstantiationIsSourceOfCreationRelation", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
107

Jonas Waeber's avatar
Jonas Waeber committed
108
        val digitalRules = Filter.entitiesByProperty("regulatedBy", digitalObject, input)
109

Jonas Waeber's avatar
Jonas Waeber committed
110
111
        val physicalRules = Filter.entitiesByProperty("regulatedBy", physicalObject, input)
        val physicalIdentifiers = Filter.entitiesByProperty("identifiedBy", physicalObject, input)
112

113
114
        val accessPhysical = Extract.typedEntityByType(physicalRules, "type", "access", "name").flatMap { it.toList() }
        val accessDigital = Extract.typedEntityByType(digitalRules, "type", "access", "name").flatMap { it.toList() }
115

116
        val usageDigital = Extract.typedEntityByType(digitalRules, "type", "usage", "sameAs").flatMap { it.toList() }
Jonas Waeber's avatar
Jonas Waeber committed
117
        val format = CarrierType.extract(Filter.entitiesByProperty("hasCarrierType", physicalObject, input))
118
119
120
        if (format.isEmpty()) {
            log.error("Found no carrier types for record $id.")
        }
121

Jonas Waeber's avatar
Jonas Waeber committed
122
        return SearchDoc(
123
124
125
126
            title = Extract.typedEntityByType(recordTitles, "type", "main", "title"),
            seriesTitle = Extract.typedEntityByType(recordTitles, "type", "series", "title"),
            broadcastTitle = Extract.typedEntityByType(recordTitles, "type", "broadcast", "title"),
            type = record["type"] as String,
Jonas Waeber's avatar
Jonas Waeber committed
127
            sourceID = try {
128
                Extract.extractSourceId(recordIdentifiers)
Jonas Waeber's avatar
Jonas Waeber committed
129
130
131
132
            } catch (ex: NoSuchElementException) {
                log.error("No source id found for record $id")
                "NoSourceIdFound"
            },
133
            sameAs = Extract.listOfStrings(record["sameAs"]),
Jonas Waeber's avatar
Jonas Waeber committed
134
            abstract = Extract.languageContainer("abstract", record["abstract"]),
Jonas Waeber's avatar
Jonas Waeber committed
135
136
            id = id,
            institution = listOf(
Jonas Waeber's avatar
Jonas Waeber committed
137
                FacettedContainer(
138
139
                    LanguageContainer(emptyList(), emptyList(), emptyList(), emptyList()),
                    listOf(record["heldBy"] as String)
Jonas Waeber's avatar
Jonas Waeber committed
140
141
                )
            ),
Jonas Waeber's avatar
Jonas Waeber committed
142
            recordSet = FacettedContainer(
143
144
                LanguageContainer(emptyList(), emptyList(), emptyList(), emptyList()),
                listOf(record["isPartOf"] as String)
Jonas Waeber's avatar
Jonas Waeber committed
145
            ),
Jonas Waeber's avatar
Jonas Waeber committed
146
147
148
149
            descriptiveNote = Extract.languageContainer("descriptiveNote", record["descriptiveNote"]),
            scopeAndContent = Extract.languageContainer("scopeAndContent", record["scopeAndContent"]),
            relatedMaterial = Extract.languageContainer("relation", record["relation"]),
            source = Extract.languageContainer("source", record["source"]),
Jonas Waeber's avatar
Jonas Waeber committed
150
151
152
            temporal = Extract.extractDate(temporal),
            dateCreated = Extract.extractDate(datesCreated),
            dateIssued = Extract.extractDate(datesIssued),
Jonas Waeber's avatar
Jonas Waeber committed
153
154
            placeCapture = placeCapturedBuilder.build(),
            placeRelated = placesRelatedBuilder.build(),
155
            placeFacet = placeFacetBuilder.build(),
156
157
158
159
160
161
162
            rightsHolder = Extract.typedEntityByType(recordRules, "type", "holder", "name"),
            memoriavClaim = record["P60451"] != null,
            languageCaption = Extract.typedEntityByType(recordLanguages, "type", "caption", "name"),
            languageContent = Extract.typedEntityByType(recordLanguages, "type", "content", "name"),
            language = Extract.facetEntity(recordLanguages, "name"),
            genre = Extract.facetEntity(genre, "prefLabel"),
            keywords = Extract.facetEntity(subjects, "prefLabel"),
163
164
165
166
167
168
169
170

            personSubject = subjectPersonBuilder.build(),
            personProducer = producersPersonBuilder.build(),
            personPublisher = publisherPersonBuilder.build(),
            personContributor = contributorPersonBuilder.build(),
            personCreator = creatorPersonBuilder.build(),
            personsFacet = personFacetBuilder.build(),

171
172
173
            corporateBodySubject = Extract.typedEntityByType(subjects, "@type", "CorporateBody", "name"),
            corporateBodyProducer = Extract.typedEntityByType(producers, "@type", "CorporateBody", "name"),
            corporateBodyPublisher = Extract.typedEntityByType(publishers, "@type", "CorporateBody", "name"),
Jonas Waeber's avatar
Jonas Waeber committed
174
175
176
177
            corporateBodyContributor = Extract.extractTypedCreationRelationAgent(
                creationRelationAgents,
                input,
                creationRelationTypeParam = "contributor",
178
                agentTypeParam = NS.rico + "CorporateBody"
Jonas Waeber's avatar
Jonas Waeber committed
179
            ),
180
181

            corporateBodyCreator = Extract.extractTypedCreationRelationAgent(
Jonas Waeber's avatar
Jonas Waeber committed
182
183
184
                creationRelationAgents,
                input,
                creationRelationTypeParam = "creator",
185
                agentTypeParam = NS.rico + "CorporateBody"
Jonas Waeber's avatar
Jonas Waeber committed
186
            ),
187
188
189
190
            agentSubject = Extract.typedEntityByType(subjects, "@type", "Agent", "name"),
            agentProducer = Extract.typedEntityByType(producers, "@type", "Agent", "name"),
            agentPublisher = Extract.typedEntityByType(publishers, "@type", "Agent", "name"),
            agentContributor = Extract.extractTypedCreationRelationAgent(
Jonas Waeber's avatar
Jonas Waeber committed
191
192
                creationRelationAgents,
                input,
193
194
                creationRelationTypeParam = "contributor",
                agentTypeParam = NS.rico + "Agent"
Jonas Waeber's avatar
Jonas Waeber committed
195
            ),
196
            agentCreator = Extract.extractTypedCreationRelationAgent(
Jonas Waeber's avatar
Jonas Waeber committed
197
198
199
                creationRelationAgents,
                input,
                creationRelationTypeParam = "creator",
200
                agentTypeParam = NS.rico + "Agent"
Jonas Waeber's avatar
Jonas Waeber committed
201
            ),
202
203
204
205
206
207

            // DIGITAL & PHYSICAL
            access = accessPhysical + accessDigital,

            // DIGITAL
            accessDigital = accessDigital,
208
209
            durationDigital = Extract.listOfStrings(digitalObject?.get("duration")),
            colourDigital = Extract.listOfStrings(digitalObject?.get("P60558")),
Jonas Waeber's avatar
Jonas Waeber committed
210
            digitalObjectNote = Extract.languageContainer("descriptiveNote", digitalObject?.get("descriptiveNote")),
211
            locator = Extract.listOfStrings(digitalObject?.get("locator")),
212
213
214
215
            usageConditionsDigital = Extract.languageContainer(
                "conditionsOfUse",
                digitalObject?.get("conditionsOfUse")
            ),
216
217
218
219
220
            usageDigital = usageDigital,
            usageDigitalGroup = usageDigital.map { ReuseStatementMap.getValue(it) },

            // PHYSICAL
            accessPhysical = accessPhysical,
221
            durationPhysical = Extract.listOfStrings(physicalObject?.get("duration")),
Jonas Waeber's avatar
Jonas Waeber committed
222
            colourPhysical = Extract.languageContainer("P60558", physicalObject?.get("P60558")),
223
224
225
226
            physicalCharacteristics = Extract.languageContainer(
                "physicalCharacteristics",
                physicalObject?.get("physicalCharacteristics")
            ),
Jonas Waeber's avatar
Jonas Waeber committed
227
            physicalObjectNote = Extract.languageContainer("descriptiveNote", physicalObject?.get("descriptiveNote")),
228
229
230
231
            usageConditionsPhysical = Extract.languageContainer(
                "conditionsOfUse",
                physicalObject?.get("conditionsOfUse")
            ),
232
            usagePhysical = Extract.typedEntityByType(physicalRules, "type", "usage", "sameAs").flatMap { it.toList() },
233
234
            callNumber = Extract.typedEntityByType(physicalIdentifiers, "type", "callNumber", "identifier")
                .flatMap { it.toList() },
Jonas Waeber's avatar
Jonas Waeber committed
235
236
            format = format,
            suggest = suggestContainerBuilder.build()[0]
Jonas Waeber's avatar
Jonas Waeber committed
237
238
239
        )
    }
}