RecordSetSearchDocBuilder.kt 11.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase

Jonas Waeber's avatar
Jonas Waeber committed
21
import ch.memobase.rdf.DC
Jonas Waeber's avatar
Jonas Waeber committed
22
import ch.memobase.rdf.MB
23
import ch.memobase.rdf.NS
Jonas Waeber's avatar
Jonas Waeber committed
24
25
import ch.memobase.rdf.RDA
import ch.memobase.rdf.RICO
26
import ch.memobase.rdf.RICO.Types.RecordSet
27
import com.beust.klaxon.JsonArray
28
29
30
31
32
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
import org.memobase.helpers.Date
import org.memobase.helpers.ElasticSearchWrapper
import org.memobase.helpers.Extract
Jonas Waeber's avatar
Jonas Waeber committed
33
import org.memobase.helpers.JsonUtility
Jonas Waeber's avatar
Jonas Waeber committed
34
import org.memobase.helpers.Constants
35
import org.memobase.model.FacetContainer
Jonas Waeber's avatar
Jonas Waeber committed
36
import org.memobase.model.IntegerRange
37
38
39
40
41
42
import org.memobase.model.LanguageContainer
import org.memobase.model.RecordSetSearchDoc
import org.memobase.model.Schema


class RecordSetSearchDocBuilder(private val elasticSearchWrapper: ElasticSearchWrapper) {
Jonas Waeber's avatar
Jonas Waeber committed
43
    private val log = LogManager.getLogger(this::class.java)
44
45
46

    fun transform(key: String, input: Map<String, JsonObject>): Schema {
        val recordSet =
Jonas Waeber's avatar
Jonas Waeber committed
47
            input[JsonUtility.recordSetTag] ?: throw InvalidInputException("No record set entity found in message $key.")
48
49
        val relatedRecordSetIds =
            Extract.identifiers(recordSet[RICO.isRecordResourceAssociatedWithRecordResource.localName])
Jonas Waeber's avatar
Jonas Waeber committed
50
        val metadataLanguages = mutableListOf<JsonObject>()
Jonas Waeber's avatar
Jonas Waeber committed
51
52
53
54
55
        var originalTitles = LanguageContainer.EMPTY
        var projectTitles = LanguageContainer.EMPTY
        var relatedRecordSets = LanguageContainer.EMPTY
        var publicationTitles = LanguageContainer.EMPTY
        var relatedDocumentTitles = LanguageContainer.EMPTY
56
57
        val originalCallNumbers = mutableListOf<String>()
        val originalIdNumbers = mutableListOf<String>()
58
        val accessInstitutions = mutableListOf<FacetContainer>()
59
60
        val masterInstitutions = mutableListOf<FacetContainer>()
        val originalInstitutions = mutableListOf<FacetContainer>()
61
62
        input.values.forEach {
            when {
Jonas Waeber's avatar
Jonas Waeber committed
63
                it[RICO.type.localName] == Constants.LanguageType.metadata -> {
Jonas Waeber's avatar
Jonas Waeber committed
64
                    metadataLanguages.add(it)
65
                }
Jonas Waeber's avatar
Jonas Waeber committed
66
                it[Constants.atType] == RICO.Identifier.uri && it[RICO.type.localName] == Constants.IdentifierType.callNumber -> {
67
68
                    originalCallNumbers.add(it[RICO.identifier.localName] as String)
                }
Jonas Waeber's avatar
Jonas Waeber committed
69
                it[Constants.atType] == RICO.Identifier.uri && it[RICO.type.localName] == Constants.IdentifierType.original -> {
70
71
                    originalIdNumbers.add(it[RICO.identifier.localName] as String)
                }
Jonas Waeber's avatar
Jonas Waeber committed
72
73
                it[Constants.atType] == RICO.Title.uri &&
                        it[RICO.type.localName] == Constants.TitleTypes.original -> {
Jonas Waeber's avatar
Jonas Waeber committed
74
75
                    originalTitles = originalTitles.add(it[RICO.title.localName])
                }
Jonas Waeber's avatar
Jonas Waeber committed
76
77
                it[Constants.atType] == RICO.CorporateBody.uri &&
                        it[RICO.type.localName] == Constants.CorporateBodyType.memoriavProject -> {
Jonas Waeber's avatar
Jonas Waeber committed
78
79
                    projectTitles = projectTitles.add(it[RICO.title.localName])
                }
Jonas Waeber's avatar
Jonas Waeber committed
80
                it[Constants.atType] == RICO.RecordSet.uri && it[RICO.type.localName] == RecordSet.related -> {
Jonas Waeber's avatar
Jonas Waeber committed
81
82
                    relatedRecordSets = relatedRecordSets.add(it[RICO.title.localName])
                }
Jonas Waeber's avatar
Jonas Waeber committed
83
                it[Constants.atType] == RICO.Record.uri -> {
84
                    if (it[RICO.type.localName] == RICO.Types.Record.publication) {
Jonas Waeber's avatar
Jonas Waeber committed
85
                        publicationTitles = publicationTitles.add(it[RICO.title.localName])
86
                    } else if (it[RICO.type.localName] == RICO.Types.Record.related) {
Jonas Waeber's avatar
Jonas Waeber committed
87
88
89
                        relatedDocumentTitles = relatedDocumentTitles.add(it[RICO.title.localName])
                    }
                }
Jonas Waeber's avatar
Jonas Waeber committed
90
                it[Constants.atType] == RICO.RecordResourceHoldingRelation.uri
91
92
93
94
95
96
                        && it[RICO.type.localName] == RICO.Types.RecordResourceHoldingRelation.access -> {
                    val value = it[RICO.recordResourceHoldingRelationHasSource.localName] as String
                    val id = value.substringAfterLast("/")
                    val names = elasticSearchWrapper.getInstitutionName(id)
                    accessInstitutions.add(names)
                }
Jonas Waeber's avatar
Jonas Waeber committed
97
                it[Constants.atType] == RICO.RecordResourceHoldingRelation.uri
98
99
100
101
102
103
                        && it[RICO.type.localName] == RICO.Types.RecordResourceHoldingRelation.master -> {
                    val value = it[RICO.recordResourceHoldingRelationHasSource.localName] as String
                    val id = value.substringAfterLast("/")
                    val names = elasticSearchWrapper.getInstitutionName(id)
                    masterInstitutions.add(names)
                }
Jonas Waeber's avatar
Jonas Waeber committed
104
                it[Constants.atType] == RICO.RecordResourceHoldingRelation.uri
105
106
107
108
109
110
                        && it[RICO.type.localName] == RICO.Types.RecordResourceHoldingRelation.original -> {
                    val value = it[RICO.recordResourceHoldingRelationHasSource.localName] as String
                    val id = value.substringAfterLast("/")
                    val names = elasticSearchWrapper.getInstitutionName(id)
                    originalInstitutions.add(names)
                }
111
112
            }
        }
113
114
115
116
117
118
119
        // related record sets which are present in memobase.
        relatedRecordSetIds.forEach { id ->
            if (id.startsWith(NS.mbrs)) {
                val languageContainer = elasticSearchWrapper.getRecordSetName(id.substringAfterLast("/"))
                relatedRecordSets = relatedRecordSets.merge(languageContainer)
            }
        }
120

Jonas Waeber's avatar
Jonas Waeber committed
121
122
        val name = extractLanguageContainer(recordSet[RICO.title.localName], "")
        val dates = Extract.identifiers(recordSet[RICO.isAssociatedWithDate.localName]).mapNotNull {
Jonas Waeber's avatar
Jonas Waeber committed
123
124
            input[it]
        }.map {
Jonas Waeber's avatar
Jonas Waeber committed
125
            it[RICO.normalizedDateValue.localName] as String
Jonas Waeber's avatar
Jonas Waeber committed
126
127
128
129
130
131
        }
        val date = if (dates.isNotEmpty()) {
            try {
                val splitDate = dates[0].split("/")
                if (splitDate.size == 2) {
                    IntegerRange(splitDate[0].toInt(), splitDate[1].toInt())
Jonas Waeber's avatar
Jonas Waeber committed
132
                } else
Jonas Waeber's avatar
Jonas Waeber committed
133
134
                    IntegerRange(splitDate[0].toInt(), splitDate[0].toInt())
            } catch (ex: NumberFormatException) {
Jonas Waeber's avatar
Jonas Waeber committed
135
                null
Jonas Waeber's avatar
Jonas Waeber committed
136
137
            }
        } else {
Jonas Waeber's avatar
Jonas Waeber committed
138
            null
139
        }
Jonas Waeber's avatar
Jonas Waeber committed
140
        val uri = recordSet[Constants.entityId] as String
Jonas Waeber's avatar
Jonas Waeber committed
141
        val id = uri.substringAfterLast("/")
142
143
144
145
146
147
148
149
150
        val institutions = when (val value = recordSet[RICO.heldBy.localName]) {
            is String -> listOf(value)
            is JsonArray<*> -> value as List<String>
            else -> {
                log.error("No institutions found for record set $id.")
                emptyList()
            }
        }
        val institutionIds = institutions.map { it.substringAfterLast("/") }
Jonas Waeber's avatar
Jonas Waeber committed
151

Jonas Waeber's avatar
Jonas Waeber committed
152
153
154
155
156
157
158
159
160
161
        val description = extractLanguageContainer(recordSet[RICO.descriptiveNote.localName], "")
        val rights = extractLanguageContainer(recordSet[RICO.conditionsOfUse.localName], "")
        val access = extractLanguageContainer(recordSet[RICO.conditionsOfAccess.localName], "")
        val accessMemobase = extractLanguageContainer(recordSet[RDA.hasRestrictionOnAccess.localName], "")
        val history = extractLanguageContainer(recordSet[RICO.history.localName], "")
        val integrity = extractLanguageContainer(recordSet[RICO.integrity.localName], "")
        val extent = extractLanguageContainer(recordSet[RICO.recordResourceExtent.localName], "")
        val scopeAndContent = extractLanguageContainer(recordSet[RICO.scopeAndContent.localName], "")
        val conformsTo = extractLanguageContainer(recordSet[DC.conformsTo.localName], "")
        val dataImport = extractLanguageContainer(recordSet[RICO.descriptiveNote.localName], "")
Jonas Waeber's avatar
Jonas Waeber committed
162
        val teaserText = extractLanguageContainer(recordSet[MB.recordSetProcessedTeaserText.localName], "")
163
164

        return RecordSetSearchDoc(
Jonas Waeber's avatar
Jonas Waeber committed
165
            recordSetId = id,
Jonas Waeber's avatar
Jonas Waeber committed
166
            published = recordSet[Constants.isPublished].let {
Jonas Waeber's avatar
Jonas Waeber committed
167
168
169
170
171
172
                when (it) {
                    is Boolean -> it
                    is String -> it.toBoolean()
                    else -> {
                        log.error("Found no isPublished property on record set $key. Set to false.")
                        false
173
                    }
Jonas Waeber's avatar
Jonas Waeber committed
174
175
176
                }
            },
            periodOfTimeAsYear = date,
177
            institution = institutionIds.map { elasticSearchWrapper.getInstitutionName(it) },
Jonas Waeber's avatar
Jonas Waeber committed
178
            supportedByMemoriav = recordSet[Constants.sponsoredBy] != null,
Jonas Waeber's avatar
Jonas Waeber committed
179
            name = name,
Jonas Waeber's avatar
Jonas Waeber committed
180
181
            documentType = elasticSearchWrapper.getDocumentTypesFromRecords(id, Constants.QueryFields.recordSetFacet),
            keyVisualLink = recordSet[Constants.wikidataImage].let { if (it != null) it as String else "NoKeyVisualLinkDefined" },
Jonas Waeber's avatar
Jonas Waeber committed
182
183
184
185
            numberOfDocuments = elasticSearchWrapper.countNumberOfDocuments(id),
            lastUpdatedDate = Date.now,
            languageOfMetadata = metadataLanguages.map {
                FacetContainer(
Jonas Waeber's avatar
Jonas Waeber committed
186
187
                    extractLanguageContainer(it[Constants.name], "NoMetadataLanguageSet"),
                    it[Constants.sameAs].let { wikidataUri ->
Jonas Waeber's avatar
Jonas Waeber committed
188
189
190
191
192
193
194
                        when (wikidataUri) {
                            is String -> wikidataUri.substringAfterLast("/")
                            else -> null
                        }
                    },
                    emptyList()
                )
Jonas Waeber's avatar
Jonas Waeber committed
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
            },
            scopeAndContent = scopeAndContent,
            accessMemobase = accessMemobase,
            context = history,
            originalTitle = originalTitles,
            extent = extent,
            selection = integrity,
            indexing = conformsTo,
            rights = rights,
            description = description,
            access = access,
            project = projectTitles,
            relatedRecordSets = relatedRecordSets,
            relatedPublications = publicationTitles,
            relatedDocuments = relatedDocumentTitles,
210
            dataImport = dataImport,
Jonas Waeber's avatar
Jonas Waeber committed
211
            accessInstitution = accessInstitutions,
212
213
214
215
216
            originalInstitution = originalInstitutions,
            masterInstitution = masterInstitutions,
            teaserTest = teaserText,
            originalIdNUmber = originalIdNumbers,
            originalCallNumber = originalCallNumbers
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
        )
    }

    private fun extractLanguageContainer(value: Any?, placeholder: String): LanguageContainer {
        return Extract.languageContainer("record set", value).let { items ->
            when {
                items.isEmpty() -> {
                    LanguageContainer.placeholder(placeholder)
                }
                items.size == 1 -> {
                    items[0]
                }
                else -> {
                    items.reduce { acc, languageContainer -> acc.merge(languageContainer) }
                }
            }
        }
    }
}