Extract.kt 8.08 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase.helpers

21
import com.beust.klaxon.JsonArray
Jonas Waeber's avatar
Jonas Waeber committed
22
23
24
25
26
27
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
import org.memobase.model.CreatorNameContainer
import org.memobase.model.DateContainer
import org.memobase.model.LanguageContainer
import org.memobase.model.NameContainer
28
import org.memobase.rdf.NS
Jonas Waeber's avatar
Jonas Waeber committed
29
30

object Extract {
Jonas Waeber's avatar
Jonas Waeber committed
31
    private val log = LogManager.getLogger("ExtractSearchDoc")
Jonas Waeber's avatar
Jonas Waeber committed
32

Jonas Waeber's avatar
Jonas Waeber committed
33
    private fun languageTag(entity: JsonObject, language: String): List<String> {
Jonas Waeber's avatar
Jonas Waeber committed
34
35
36
37
38
39
40
41
        return entity["@value"].let {
            if (entity["@language"] == language && it != null)
                listOf(it as String)
            else
                emptyList()
        }
    }

Jonas Waeber's avatar
Jonas Waeber committed
42
    fun languageContainer(parent: String, entity: Any?): List<LanguageContainer> {
Jonas Waeber's avatar
Jonas Waeber committed
43
44
45
46
47
        return when (entity) {
            is String -> listOf(LanguageContainer(emptyList(), emptyList(), emptyList(), listOf(entity)))
            is JsonObject ->
                listOf(
                    LanguageContainer(
Jonas Waeber's avatar
Jonas Waeber committed
48
49
50
                        de = languageTag(entity, "de"),
                        fr = languageTag(entity, "fr"),
                        it = languageTag(entity, "it"),
Jonas Waeber's avatar
Jonas Waeber committed
51
52
53
54
55
56
57
58
59
60
                        un = emptyList()
                    )
                )
            is List<*> ->
                listOf(entity.mapNotNull { subEntity ->
                    when (subEntity) {
                        is String ->
                            LanguageContainer(emptyList(), emptyList(), emptyList(), listOf(subEntity))
                        is JsonObject ->
                            LanguageContainer(
Jonas Waeber's avatar
Jonas Waeber committed
61
62
63
                                de = languageTag(subEntity, "de"),
                                fr = languageTag(subEntity, "fr"),
                                it = languageTag(subEntity, "it"),
Jonas Waeber's avatar
Jonas Waeber committed
64
65
66
67
68
69
70
71
72
73
                                un = emptyList()
                            )
                        else -> {
                            log.error("Could not extract language container from subEntity $subEntity in entity $entity.")
                            null
                        }
                    }
                }.reduce { acc, languageContainer ->
                    acc.merge(languageContainer)
                })
Jonas Waeber's avatar
Jonas Waeber committed
74
75
76
77
            null -> {
                log.info("No items in $parent found.")
                emptyList()
            }
Jonas Waeber's avatar
Jonas Waeber committed
78
79
80
81
82
83
84
85
86
            else -> {
                log.error("Could not extract language container from entity: $entity in parent $parent.")
                emptyList()
            }
        }
    }

    fun extractDate(entities: List<JsonObject>): List<DateContainer> {
        return entities.map { entity ->
87
            val isNormalized = entity.containsKey("normalizedDateValue")
Jonas Waeber's avatar
Jonas Waeber committed
88
            val date = if (isNormalized) {
89
                entity["normalizedDateValue"] as String
Jonas Waeber's avatar
Jonas Waeber committed
90
            } else {
91
                entity["expressedDate"] as String
Jonas Waeber's avatar
Jonas Waeber committed
92
            }
93
94
            val qualifier = entity["dateQualifier"] as String?
            val certainty = entity["certainty"] as String?
Jonas Waeber's avatar
Jonas Waeber committed
95
            val facetList = when (entity["@type"] as String) {
96
                NS.rico + "SingleDate" ->
Jonas Waeber's avatar
Jonas Waeber committed
97
98
99
                    if (isNormalized)
                        DateFacetBuilder.buildFromNormalizedSingleDate(date)
                    else emptyList()
100
                NS.rico + "DateRange" ->
Jonas Waeber's avatar
Jonas Waeber committed
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
                    if (isNormalized)
                        DateFacetBuilder.buildFromNormalizedDateRange(date)
                    else emptyList()
                else -> emptyList()
            }
            DateContainer(
                date = date,
                qualifier = qualifier,
                certainty = certainty,
                facet = facetList
            )
        }
    }

    fun typedEntityByType(
        entities: List<JsonObject>,
        field: String,
        type: String,
        annotationField: String
    ): List<LanguageContainer> {
        return entities.filter { it[field] == type }.flatMap {
Jonas Waeber's avatar
Jonas Waeber committed
122
            languageContainer(it["@id"] as String, it[annotationField])
Jonas Waeber's avatar
Jonas Waeber committed
123
124
125
126
127
        }
    }

    fun facetEntity(entities: List<JsonObject>, property: String): List<NameContainer> {
        return entities.mapNotNull {
Jonas Waeber's avatar
Jonas Waeber committed
128
            val lang = languageContainer(it["@id"] as String, it[property])
Jonas Waeber's avatar
Jonas Waeber committed
129
130
131
132
133
134
135
136
137
138
            if (lang.isEmpty()) {
                null
            } else {
                NameContainer(lang.first(), lang.first().toList())
            }
        }
    }

    fun extractPlaces(entities: List<JsonObject>): List<LanguageContainer> {
        return entities.flatMap {
Jonas Waeber's avatar
Jonas Waeber committed
139
            languageContainer(it["@id"] as String, it["name"])
Jonas Waeber's avatar
Jonas Waeber committed
140
141
142
143
144
        }
    }

    fun extractSourceId(entities: List<JsonObject>): String {
        return entities.first {
145
146
            it["type"] as String == "original"
        }["identifier"] as String? ?: error("Identifier contains no property rico:identifier: $entities.")
Jonas Waeber's avatar
Jonas Waeber committed
147
148
149
150
151
152
153
154
155
    }

    fun extractTypedCreationRelationAgent(
        creationRelations: List<JsonObject>,
        input: Map<String, JsonObject>,
        creationRelationTypeParam: String,
        agentTypeParam: String
    ): List<CreatorNameContainer> {
        return creationRelations.mapNotNull { creationRelation ->
156
            val creationRelationType = creationRelation["type"] as String
Jonas Waeber's avatar
Jonas Waeber committed
157
            if (creationRelationTypeParam != creationRelationType) return@mapNotNull null
Jonas Waeber's avatar
Jonas Waeber committed
158
159
160
161
            val name = languageContainer("creation-relation-name", creationRelation["name"])
            if (name.isEmpty()) {
                log.warn("No name for creation relation found: $creationRelation.")
            }
Jonas Waeber's avatar
Jonas Waeber committed
162
            val agent = Filter.entitiesByProperty("creationRelationHasTarget", creationRelation, input).first()
Jonas Waeber's avatar
Jonas Waeber committed
163
164
            val agentType = agent["@type"] as String
            if (agentType != agentTypeParam) return@mapNotNull null
Jonas Waeber's avatar
Jonas Waeber committed
165
            val agentName = languageContainer(agent["@id"] as String, agent["name"])
166
167
168
169
            if (agentName.isEmpty()) {
                log.error("Agent/Person/CorporateBody without rico:name found: ${agent["@id"]}.")
                null
            } else {
Jonas Waeber's avatar
Jonas Waeber committed
170
171
172
173
                return@mapNotNull CreatorNameContainer(
                    agentName.reduce { acc, languageContainer -> acc.merge(languageContainer) },
                    if (name.isNotEmpty()) name.reduce { acc, languageContainer -> acc.merge(languageContainer) }
                    else LanguageContainer(emptyList(), emptyList(), emptyList(), listOf("Unknown")))
174
            }
Jonas Waeber's avatar
Jonas Waeber committed
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
        }
    }

    fun listOfStrings(value: Any?): List<String> {
        return when (value) {
            is String -> listOf(value)
            is List<*> ->
                if (value[0] is String)
                    value as List<String>
                else
                    emptyList()
            else ->
                emptyList()
        }
    }
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209

    fun identifier(value: Any?): List<String> {
        return when (value) {
            is String -> listOf(value)
            is JsonObject -> value["@id"].let { if (it is String) listOf(it) else emptyList() }
            is JsonArray<*> ->
                value.mapNotNull { item ->
                    when (item) {
                        is String -> item
                        is JsonObject -> value["@id"].let { id: Any? ->
                            if (id is String)
                                id
                            else null
                        }
                        else -> null
                    }
                }
            else -> emptyList()
        }
    }
Jonas Waeber's avatar
Jonas Waeber committed
210
}