RecordSetSearchDocBuilder.kt 8.98 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase

Jonas Waeber's avatar
Jonas Waeber committed
21
import ch.memobase.rdf.DC
22
import ch.memobase.rdf.NS
Jonas Waeber's avatar
Jonas Waeber committed
23
24
import ch.memobase.rdf.RDA
import ch.memobase.rdf.RICO
25
import ch.memobase.rdf.RICO.Types.RecordSet
26
27
28
29
30
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
import org.memobase.helpers.Date
import org.memobase.helpers.ElasticSearchWrapper
import org.memobase.helpers.Extract
Jonas Waeber's avatar
Jonas Waeber committed
31
import org.memobase.helpers.JSON
32
33
import org.memobase.helpers.KEYS
import org.memobase.model.FacetContainer
Jonas Waeber's avatar
Jonas Waeber committed
34
import org.memobase.model.IntegerRange
35
36
37
38
39
40
import org.memobase.model.LanguageContainer
import org.memobase.model.RecordSetSearchDoc
import org.memobase.model.Schema


class RecordSetSearchDocBuilder(private val elasticSearchWrapper: ElasticSearchWrapper) {
Jonas Waeber's avatar
Jonas Waeber committed
41
    private val log = LogManager.getLogger(this::class.java)
42
43
44

    fun transform(key: String, input: Map<String, JsonObject>): Schema {
        val recordSet =
Jonas Waeber's avatar
Jonas Waeber committed
45
            input[JSON.recordSetTag] ?: throw InvalidInputException("No record set entity found in message $key.")
46
47
        val relatedRecordSetIds =
            Extract.identifiers(recordSet[RICO.isRecordResourceAssociatedWithRecordResource.localName])
Jonas Waeber's avatar
Jonas Waeber committed
48
        val metadataLanguages = mutableListOf<JsonObject>()
Jonas Waeber's avatar
Jonas Waeber committed
49
50
51
52
53
        var originalTitles = LanguageContainer.EMPTY
        var projectTitles = LanguageContainer.EMPTY
        var relatedRecordSets = LanguageContainer.EMPTY
        var publicationTitles = LanguageContainer.EMPTY
        var relatedDocumentTitles = LanguageContainer.EMPTY
54
        val accessInstitutions = mutableListOf<FacetContainer>()
55
56
        input.values.forEach {
            when {
Jonas Waeber's avatar
Jonas Waeber committed
57
                it[RICO.type.localName] == KEYS.LanguageType.metadata -> {
Jonas Waeber's avatar
Jonas Waeber committed
58
                    metadataLanguages.add(it)
59
                }
Jonas Waeber's avatar
Jonas Waeber committed
60
61
62
63
64
65
66
67
                it[KEYS.atType] == RICO.Title.uri &&
                        it[RICO.type.localName] == KEYS.TitleTypes.original -> {
                    originalTitles = originalTitles.add(it[RICO.title.localName])
                }
                it[KEYS.atType] == RICO.CorporateBody.uri &&
                        it[RICO.type.localName] == KEYS.CorporateBodyType.memoriavProject -> {
                    projectTitles = projectTitles.add(it[RICO.title.localName])
                }
68
                it[KEYS.atType] == RICO.RecordSet.uri && it[RICO.type.localName] == RecordSet.related -> {
Jonas Waeber's avatar
Jonas Waeber committed
69
70
71
                    relatedRecordSets = relatedRecordSets.add(it[RICO.title.localName])
                }
                it[KEYS.atType] == RICO.Record.uri -> {
72
                    if (it[RICO.type.localName] == RICO.Types.Record.publication) {
Jonas Waeber's avatar
Jonas Waeber committed
73
                        publicationTitles = publicationTitles.add(it[RICO.title.localName])
74
                    } else if (it[RICO.type.localName] == RICO.Types.Record.related) {
Jonas Waeber's avatar
Jonas Waeber committed
75
76
77
                        relatedDocumentTitles = relatedDocumentTitles.add(it[RICO.title.localName])
                    }
                }
78
79
80
81
82
83
84
                it[KEYS.atType] == RICO.RecordResourceHoldingRelation
                        && it[RICO.type.localName] == RICO.Types.RecordResourceHoldingRelation.access -> {
                    val value = it[RICO.recordResourceHoldingRelationHasSource.localName] as String
                    val id = value.substringAfterLast("/")
                    val names = elasticSearchWrapper.getInstitutionName(id)
                    accessInstitutions.add(names)
                }
85
86
            }
        }
87
88
89
90
91
92
93
        // related record sets which are present in memobase.
        relatedRecordSetIds.forEach { id ->
            if (id.startsWith(NS.mbrs)) {
                val languageContainer = elasticSearchWrapper.getRecordSetName(id.substringAfterLast("/"))
                relatedRecordSets = relatedRecordSets.merge(languageContainer)
            }
        }
94

Jonas Waeber's avatar
Jonas Waeber committed
95
96
        val name = extractLanguageContainer(recordSet[RICO.title.localName], "")
        val dates = Extract.identifiers(recordSet[RICO.isAssociatedWithDate.localName]).mapNotNull {
Jonas Waeber's avatar
Jonas Waeber committed
97
98
            input[it]
        }.map {
Jonas Waeber's avatar
Jonas Waeber committed
99
            it[RICO.normalizedDateValue.localName] as String
Jonas Waeber's avatar
Jonas Waeber committed
100
101
102
103
104
105
        }
        val date = if (dates.isNotEmpty()) {
            try {
                val splitDate = dates[0].split("/")
                if (splitDate.size == 2) {
                    IntegerRange(splitDate[0].toInt(), splitDate[1].toInt())
Jonas Waeber's avatar
Jonas Waeber committed
106
                } else
Jonas Waeber's avatar
Jonas Waeber committed
107
108
                    IntegerRange(splitDate[0].toInt(), splitDate[0].toInt())
            } catch (ex: NumberFormatException) {
Jonas Waeber's avatar
Jonas Waeber committed
109
                null
Jonas Waeber's avatar
Jonas Waeber committed
110
111
            }
        } else {
Jonas Waeber's avatar
Jonas Waeber committed
112
            null
113
        }
Jonas Waeber's avatar
Jonas Waeber committed
114
115
        val uri = recordSet[KEYS.entityId] as String
        val id = uri.substringAfterLast("/")
Jonas Waeber's avatar
Jonas Waeber committed
116
        val institution = recordSet[RICO.heldBy.localName] as String
Jonas Waeber's avatar
Jonas Waeber committed
117
118
        val institutionId = institution.substringAfterLast("/")

Jonas Waeber's avatar
Jonas Waeber committed
119
120
121
122
123
124
125
126
127
128
        val description = extractLanguageContainer(recordSet[RICO.descriptiveNote.localName], "")
        val rights = extractLanguageContainer(recordSet[RICO.conditionsOfUse.localName], "")
        val access = extractLanguageContainer(recordSet[RICO.conditionsOfAccess.localName], "")
        val accessMemobase = extractLanguageContainer(recordSet[RDA.hasRestrictionOnAccess.localName], "")
        val history = extractLanguageContainer(recordSet[RICO.history.localName], "")
        val integrity = extractLanguageContainer(recordSet[RICO.integrity.localName], "")
        val extent = extractLanguageContainer(recordSet[RICO.recordResourceExtent.localName], "")
        val scopeAndContent = extractLanguageContainer(recordSet[RICO.scopeAndContent.localName], "")
        val conformsTo = extractLanguageContainer(recordSet[DC.conformsTo.localName], "")
        val dataImport = extractLanguageContainer(recordSet[RICO.descriptiveNote.localName], "")
129
130

        return RecordSetSearchDoc(
Jonas Waeber's avatar
Jonas Waeber committed
131
132
133
134
135
136
137
138
            recordSetId = id,
            isPublished = recordSet[KEYS.isPublished].let {
                when (it) {
                    is Boolean -> it
                    is String -> it.toBoolean()
                    else -> {
                        log.error("Found no isPublished property on record set $key. Set to false.")
                        false
139
                    }
Jonas Waeber's avatar
Jonas Waeber committed
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
                }
            },
            periodOfTimeAsYear = date,
            institution = elasticSearchWrapper.getInstitutionName(institutionId),
            supportedByMemoriav = recordSet[KEYS.sponsoredBy] != null,
            name = name,
            documentType = elasticSearchWrapper.getDocumentTypesFromRecords(id, KEYS.QueryFields.recordSetFacet),
            keyVisualLink = recordSet[KEYS.wikidataImage].let { if (it != null) it as String else "NoKeyVisualLinkDefined" },
            numberOfDocuments = elasticSearchWrapper.countNumberOfDocuments(id),
            lastUpdatedDate = Date.now,
            languageOfMetadata = metadataLanguages.map {
                FacetContainer(
                    extractLanguageContainer(it[KEYS.name], "NoMetadataLanguageSet"),
                    it[KEYS.sameAs].let { wikidataUri ->
                        when (wikidataUri) {
                            is String -> wikidataUri.substringAfterLast("/")
                            else -> null
                        }
                    },
                    emptyList()
                )
Jonas Waeber's avatar
Jonas Waeber committed
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
            },
            scopeAndContent = scopeAndContent,
            accessMemobase = accessMemobase,
            context = history,
            originalTitle = originalTitles,
            extent = extent,
            selection = integrity,
            indexing = conformsTo,
            rights = rights,
            description = description,
            access = access,
            project = projectTitles,
            relatedRecordSets = relatedRecordSets,
            relatedPublications = publicationTitles,
            relatedDocuments = relatedDocumentTitles,
176
177
            dataImport = dataImport,
            accessInstitution = accessInstitutions
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
        )
    }

    private fun extractLanguageContainer(value: Any?, placeholder: String): LanguageContainer {
        return Extract.languageContainer("record set", value).let { items ->
            when {
                items.isEmpty() -> {
                    LanguageContainer.placeholder(placeholder)
                }
                items.size == 1 -> {
                    items[0]
                }
                else -> {
                    items.reduce { acc, languageContainer -> acc.merge(languageContainer) }
                }
            }
        }
    }
}