RecordSetSearchDocBuilder.kt 11.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase

Jonas Waeber's avatar
Jonas Waeber committed
21
import ch.memobase.rdf.DC
Jonas Waeber's avatar
Jonas Waeber committed
22
import ch.memobase.rdf.MB
23
import ch.memobase.rdf.NS
Jonas Waeber's avatar
Jonas Waeber committed
24
import ch.memobase.rdf.RDA
25
import ch.memobase.rdf.RDF
Jonas Waeber's avatar
Jonas Waeber committed
26
import ch.memobase.rdf.RICO
27
import ch.memobase.rdf.RICO.Types.RecordSet
28
import com.beust.klaxon.JsonArray
29 30 31 32 33
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
import org.memobase.helpers.Date
import org.memobase.helpers.ElasticSearchWrapper
import org.memobase.helpers.Extract
Jonas Waeber's avatar
Jonas Waeber committed
34
import org.memobase.helpers.JSON
35 36
import org.memobase.helpers.KEYS
import org.memobase.model.FacetContainer
Jonas Waeber's avatar
Jonas Waeber committed
37
import org.memobase.model.IntegerRange
38 39 40 41 42 43
import org.memobase.model.LanguageContainer
import org.memobase.model.RecordSetSearchDoc
import org.memobase.model.Schema


class RecordSetSearchDocBuilder(private val elasticSearchWrapper: ElasticSearchWrapper) {
Jonas Waeber's avatar
Jonas Waeber committed
44
    private val log = LogManager.getLogger(this::class.java)
45 46 47

    fun transform(key: String, input: Map<String, JsonObject>): Schema {
        val recordSet =
Jonas Waeber's avatar
Jonas Waeber committed
48
            input[JSON.recordSetTag] ?: throw InvalidInputException("No record set entity found in message $key.")
49 50
        val relatedRecordSetIds =
            Extract.identifiers(recordSet[RICO.isRecordResourceAssociatedWithRecordResource.localName])
Jonas Waeber's avatar
Jonas Waeber committed
51
        val metadataLanguages = mutableListOf<JsonObject>()
Jonas Waeber's avatar
Jonas Waeber committed
52 53 54 55 56
        var originalTitles = LanguageContainer.EMPTY
        var projectTitles = LanguageContainer.EMPTY
        var relatedRecordSets = LanguageContainer.EMPTY
        var publicationTitles = LanguageContainer.EMPTY
        var relatedDocumentTitles = LanguageContainer.EMPTY
57 58
        val originalCallNumbers = mutableListOf<String>()
        val originalIdNumbers = mutableListOf<String>()
59
        val accessInstitutions = mutableListOf<FacetContainer>()
60 61
        val masterInstitutions = mutableListOf<FacetContainer>()
        val originalInstitutions = mutableListOf<FacetContainer>()
62 63
        input.values.forEach {
            when {
Jonas Waeber's avatar
Jonas Waeber committed
64
                it[RICO.type.localName] == KEYS.LanguageType.metadata -> {
Jonas Waeber's avatar
Jonas Waeber committed
65
                    metadataLanguages.add(it)
66
                }
67 68 69 70 71 72
                it[KEYS.atType] == RICO.Identifier.uri && it[RICO.type.localName] == KEYS.IdentifierType.callNumber -> {
                    originalCallNumbers.add(it[RICO.identifier.localName] as String)
                }
                it[KEYS.atType] == RICO.Identifier.uri && it[RICO.type.localName] == KEYS.IdentifierType.original -> {
                    originalIdNumbers.add(it[RICO.identifier.localName] as String)
                }
Jonas Waeber's avatar
Jonas Waeber committed
73 74 75 76 77 78 79 80
                it[KEYS.atType] == RICO.Title.uri &&
                        it[RICO.type.localName] == KEYS.TitleTypes.original -> {
                    originalTitles = originalTitles.add(it[RICO.title.localName])
                }
                it[KEYS.atType] == RICO.CorporateBody.uri &&
                        it[RICO.type.localName] == KEYS.CorporateBodyType.memoriavProject -> {
                    projectTitles = projectTitles.add(it[RICO.title.localName])
                }
81
                it[KEYS.atType] == RICO.RecordSet.uri && it[RICO.type.localName] == RecordSet.related -> {
Jonas Waeber's avatar
Jonas Waeber committed
82 83 84
                    relatedRecordSets = relatedRecordSets.add(it[RICO.title.localName])
                }
                it[KEYS.atType] == RICO.Record.uri -> {
85
                    if (it[RICO.type.localName] == RICO.Types.Record.publication) {
Jonas Waeber's avatar
Jonas Waeber committed
86
                        publicationTitles = publicationTitles.add(it[RICO.title.localName])
87
                    } else if (it[RICO.type.localName] == RICO.Types.Record.related) {
Jonas Waeber's avatar
Jonas Waeber committed
88 89 90
                        relatedDocumentTitles = relatedDocumentTitles.add(it[RICO.title.localName])
                    }
                }
91
                it[KEYS.atType] == RICO.RecordResourceHoldingRelation.uri
92 93 94 95 96 97
                        && it[RICO.type.localName] == RICO.Types.RecordResourceHoldingRelation.access -> {
                    val value = it[RICO.recordResourceHoldingRelationHasSource.localName] as String
                    val id = value.substringAfterLast("/")
                    val names = elasticSearchWrapper.getInstitutionName(id)
                    accessInstitutions.add(names)
                }
98 99 100 101 102 103 104 105 106 107 108 109 110 111
                it[KEYS.atType] == RICO.RecordResourceHoldingRelation.uri
                        && it[RICO.type.localName] == RICO.Types.RecordResourceHoldingRelation.master -> {
                    val value = it[RICO.recordResourceHoldingRelationHasSource.localName] as String
                    val id = value.substringAfterLast("/")
                    val names = elasticSearchWrapper.getInstitutionName(id)
                    masterInstitutions.add(names)
                }
                it[KEYS.atType] == RICO.RecordResourceHoldingRelation.uri
                        && it[RICO.type.localName] == RICO.Types.RecordResourceHoldingRelation.original -> {
                    val value = it[RICO.recordResourceHoldingRelationHasSource.localName] as String
                    val id = value.substringAfterLast("/")
                    val names = elasticSearchWrapper.getInstitutionName(id)
                    originalInstitutions.add(names)
                }
112 113
            }
        }
114 115 116 117 118 119 120
        // related record sets which are present in memobase.
        relatedRecordSetIds.forEach { id ->
            if (id.startsWith(NS.mbrs)) {
                val languageContainer = elasticSearchWrapper.getRecordSetName(id.substringAfterLast("/"))
                relatedRecordSets = relatedRecordSets.merge(languageContainer)
            }
        }
121

Jonas Waeber's avatar
Jonas Waeber committed
122 123
        val name = extractLanguageContainer(recordSet[RICO.title.localName], "")
        val dates = Extract.identifiers(recordSet[RICO.isAssociatedWithDate.localName]).mapNotNull {
Jonas Waeber's avatar
Jonas Waeber committed
124 125
            input[it]
        }.map {
Jonas Waeber's avatar
Jonas Waeber committed
126
            it[RICO.normalizedDateValue.localName] as String
Jonas Waeber's avatar
Jonas Waeber committed
127 128 129 130 131 132
        }
        val date = if (dates.isNotEmpty()) {
            try {
                val splitDate = dates[0].split("/")
                if (splitDate.size == 2) {
                    IntegerRange(splitDate[0].toInt(), splitDate[1].toInt())
Jonas Waeber's avatar
Jonas Waeber committed
133
                } else
Jonas Waeber's avatar
Jonas Waeber committed
134 135
                    IntegerRange(splitDate[0].toInt(), splitDate[0].toInt())
            } catch (ex: NumberFormatException) {
Jonas Waeber's avatar
Jonas Waeber committed
136
                null
Jonas Waeber's avatar
Jonas Waeber committed
137 138
            }
        } else {
Jonas Waeber's avatar
Jonas Waeber committed
139
            null
140
        }
Jonas Waeber's avatar
Jonas Waeber committed
141 142
        val uri = recordSet[KEYS.entityId] as String
        val id = uri.substringAfterLast("/")
143 144 145 146 147 148 149 150 151
        val institutions = when (val value = recordSet[RICO.heldBy.localName]) {
            is String -> listOf(value)
            is JsonArray<*> -> value as List<String>
            else -> {
                log.error("No institutions found for record set $id.")
                emptyList()
            }
        }
        val institutionIds = institutions.map { it.substringAfterLast("/") }
Jonas Waeber's avatar
Jonas Waeber committed
152

Jonas Waeber's avatar
Jonas Waeber committed
153 154 155 156 157 158 159 160 161 162
        val description = extractLanguageContainer(recordSet[RICO.descriptiveNote.localName], "")
        val rights = extractLanguageContainer(recordSet[RICO.conditionsOfUse.localName], "")
        val access = extractLanguageContainer(recordSet[RICO.conditionsOfAccess.localName], "")
        val accessMemobase = extractLanguageContainer(recordSet[RDA.hasRestrictionOnAccess.localName], "")
        val history = extractLanguageContainer(recordSet[RICO.history.localName], "")
        val integrity = extractLanguageContainer(recordSet[RICO.integrity.localName], "")
        val extent = extractLanguageContainer(recordSet[RICO.recordResourceExtent.localName], "")
        val scopeAndContent = extractLanguageContainer(recordSet[RICO.scopeAndContent.localName], "")
        val conformsTo = extractLanguageContainer(recordSet[DC.conformsTo.localName], "")
        val dataImport = extractLanguageContainer(recordSet[RICO.descriptiveNote.localName], "")
Jonas Waeber's avatar
Jonas Waeber committed
163
        val teaserText = extractLanguageContainer(recordSet[MB.recordSetProcessedTeaserText.localName], "")
164 165

        return RecordSetSearchDoc(
Jonas Waeber's avatar
Jonas Waeber committed
166 167 168 169 170 171 172 173
            recordSetId = id,
            isPublished = recordSet[KEYS.isPublished].let {
                when (it) {
                    is Boolean -> it
                    is String -> it.toBoolean()
                    else -> {
                        log.error("Found no isPublished property on record set $key. Set to false.")
                        false
174
                    }
Jonas Waeber's avatar
Jonas Waeber committed
175 176 177
                }
            },
            periodOfTimeAsYear = date,
178
            institution = institutionIds.map { elasticSearchWrapper.getInstitutionName(it) },
Jonas Waeber's avatar
Jonas Waeber committed
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
            supportedByMemoriav = recordSet[KEYS.sponsoredBy] != null,
            name = name,
            documentType = elasticSearchWrapper.getDocumentTypesFromRecords(id, KEYS.QueryFields.recordSetFacet),
            keyVisualLink = recordSet[KEYS.wikidataImage].let { if (it != null) it as String else "NoKeyVisualLinkDefined" },
            numberOfDocuments = elasticSearchWrapper.countNumberOfDocuments(id),
            lastUpdatedDate = Date.now,
            languageOfMetadata = metadataLanguages.map {
                FacetContainer(
                    extractLanguageContainer(it[KEYS.name], "NoMetadataLanguageSet"),
                    it[KEYS.sameAs].let { wikidataUri ->
                        when (wikidataUri) {
                            is String -> wikidataUri.substringAfterLast("/")
                            else -> null
                        }
                    },
                    emptyList()
                )
Jonas Waeber's avatar
Jonas Waeber committed
196 197 198 199 200 201 202 203 204 205 206 207 208 209 210
            },
            scopeAndContent = scopeAndContent,
            accessMemobase = accessMemobase,
            context = history,
            originalTitle = originalTitles,
            extent = extent,
            selection = integrity,
            indexing = conformsTo,
            rights = rights,
            description = description,
            access = access,
            project = projectTitles,
            relatedRecordSets = relatedRecordSets,
            relatedPublications = publicationTitles,
            relatedDocuments = relatedDocumentTitles,
211
            dataImport = dataImport,
Jonas Waeber's avatar
Jonas Waeber committed
212
            accessInstitution = accessInstitutions,
213 214 215 216 217
            originalInstitution = originalInstitutions,
            masterInstitution = masterInstitutions,
            teaserTest = teaserText,
            originalIdNUmber = originalIdNumbers,
            originalCallNumber = originalCallNumbers
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
        )
    }

    private fun extractLanguageContainer(value: Any?, placeholder: String): LanguageContainer {
        return Extract.languageContainer("record set", value).let { items ->
            when {
                items.isEmpty() -> {
                    LanguageContainer.placeholder(placeholder)
                }
                items.size == 1 -> {
                    items[0]
                }
                else -> {
                    items.reduce { acc, languageContainer -> acc.merge(languageContainer) }
                }
            }
        }
    }
}