RecordSetSearchDocBuilder.kt 5.03 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase

import com.beust.klaxon.JsonObject
Jonas Waeber's avatar
Jonas Waeber committed
22
import java.lang.NumberFormatException
23
24
25
26
27
28
import org.apache.logging.log4j.LogManager
import org.memobase.helpers.Date
import org.memobase.helpers.ElasticSearchWrapper
import org.memobase.helpers.Extract
import org.memobase.helpers.KEYS
import org.memobase.model.FacetContainer
Jonas Waeber's avatar
Jonas Waeber committed
29
import org.memobase.model.IntegerRange
30
31
32
33
34
35
36
37
38
39
import org.memobase.model.LanguageContainer
import org.memobase.model.RecordSetSearchDoc
import org.memobase.model.Schema


class RecordSetSearchDocBuilder(private val elasticSearchWrapper: ElasticSearchWrapper) {
    private val log = LogManager.getLogger("RecordSetSearchDocBuilder")

    fun transform(key: String, input: Map<String, JsonObject>): Schema {
        val recordSet =
Jonas Waeber's avatar
Jonas Waeber committed
40
41
            input["recordSet"] ?: throw InvalidInputException("No recordSet entity found in message $key.")
        val metadataLanguages = mutableListOf<JsonObject>()
42
43
        input.values.forEach {
            when {
Jonas Waeber's avatar
Jonas Waeber committed
44
45
                it[KEYS.ricoType] == KEYS.LanguageType.metadata -> {
                    metadataLanguages.add(it)
46
47
48
49
50
51
                }
            }
        }

        val name = extractLanguageContainer(recordSet[KEYS.title], "NoNameFound")
        val description = extractLanguageContainer(recordSet[KEYS.descriptiveNote], "NoDescriptionFound")
Jonas Waeber's avatar
Jonas Waeber committed
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
        val dates = Extract.identifiers(recordSet[KEYS.isAssociatedWithDate]).mapNotNull {
            input[it]
        }.map {
            it[KEYS.normalizedDateValue] as String
        }
        val date = if (dates.isNotEmpty()) {
            try {
                val splitDate = dates[0].split("/")
                if (splitDate.size == 2) {
                    IntegerRange(splitDate[0].toInt(), splitDate[1].toInt())
                }
                else
                    IntegerRange(splitDate[0].toInt(), splitDate[0].toInt())
            } catch (ex: NumberFormatException) {
                IntegerRange(3000, 3001)
            }
        } else {
            IntegerRange(3000, 3001)
70
        }
Jonas Waeber's avatar
Jonas Waeber committed
71
72
73
74
75
        val uri = recordSet[KEYS.entityId] as String
        val id = uri.substringAfterLast("/")
        val institution = recordSet[KEYS.heldBy] as String
        val institutionId = institution.substringAfterLast("/")

76
77

        return RecordSetSearchDoc(
Jonas Waeber's avatar
Jonas Waeber committed
78
79
80
81
82
83
84
85
            recordSetId = id,
            isPublished = recordSet[KEYS.isPublished].let {
                when (it) {
                    is Boolean -> it
                    is String -> it.toBoolean()
                    else -> {
                        log.error("Found no isPublished property on record set $key. Set to false.")
                        false
86
                    }
Jonas Waeber's avatar
Jonas Waeber committed
87
88
89
90
91
92
                }
            },
            scopeAndContent = description,
            periodOfTimeAsYear = date,
            institution = elasticSearchWrapper.getInstitutionName(institutionId),
            supportedByMemoriav = recordSet[KEYS.sponsoredBy] != null,
93

Jonas Waeber's avatar
Jonas Waeber committed
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
            name = name,
            documentType = elasticSearchWrapper.getDocumentTypesFromRecords(id, KEYS.QueryFields.recordSetFacet),
            keyVisualLink = recordSet[KEYS.wikidataImage].let { if (it != null) it as String else "NoKeyVisualLinkDefined" },
            numberOfDocuments = elasticSearchWrapper.countNumberOfDocuments(id),
            lastUpdatedDate = Date.now,
            languageOfMetadata = metadataLanguages.map {
                FacetContainer(
                    extractLanguageContainer(it[KEYS.name], "NoMetadataLanguageSet"),
                    it[KEYS.sameAs].let { wikidataUri ->
                        when (wikidataUri) {
                            is String -> wikidataUri.substringAfterLast("/")
                            else -> null
                        }
                    },
                    emptyList()
                )
            }
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
        )
    }

    private fun extractLanguageContainer(value: Any?, placeholder: String): LanguageContainer {
        return Extract.languageContainer("record set", value).let { items ->
            when {
                items.isEmpty() -> {
                    LanguageContainer.placeholder(placeholder)
                }
                items.size == 1 -> {
                    items[0]
                }
                else -> {
                    items.reduce { acc, languageContainer -> acc.merge(languageContainer) }
                }
            }
        }
    }
}