Extract.kt 6.75 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase.helpers

21
import com.beust.klaxon.JsonArray
Jonas Waeber's avatar
Jonas Waeber committed
22
23
24
import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
import org.memobase.model.DateContainer
Jonas Waeber's avatar
Jonas Waeber committed
25
import org.memobase.model.FacettedContainer
Jonas Waeber's avatar
Jonas Waeber committed
26
import org.memobase.model.LanguageContainer
Jonas Waeber's avatar
Jonas Waeber committed
27
import org.memobase.model.RelationNameContainer
28
import org.memobase.rdf.NS
Jonas Waeber's avatar
Jonas Waeber committed
29
30

object Extract {
Jonas Waeber's avatar
Jonas Waeber committed
31
    private val log = LogManager.getLogger("ExtractSearchDoc")
Jonas Waeber's avatar
Jonas Waeber committed
32

Jonas Waeber's avatar
Jonas Waeber committed
33
    private fun languageTag(entity: JsonObject, language: String): List<String> {
Jonas Waeber's avatar
Jonas Waeber committed
34
35
36
37
38
39
40
41
        return entity["@value"].let {
            if (entity["@language"] == language && it != null)
                listOf(it as String)
            else
                emptyList()
        }
    }

Jonas Waeber's avatar
Jonas Waeber committed
42
    fun languageContainer(parent: String, entity: Any?): List<LanguageContainer> {
Jonas Waeber's avatar
Jonas Waeber committed
43
44
45
46
47
        return when (entity) {
            is String -> listOf(LanguageContainer(emptyList(), emptyList(), emptyList(), listOf(entity)))
            is JsonObject ->
                listOf(
                    LanguageContainer(
Jonas Waeber's avatar
Jonas Waeber committed
48
49
50
                        de = languageTag(entity, "de"),
                        fr = languageTag(entity, "fr"),
                        it = languageTag(entity, "it"),
Jonas Waeber's avatar
Jonas Waeber committed
51
52
53
54
55
56
57
58
59
60
                        un = emptyList()
                    )
                )
            is List<*> ->
                listOf(entity.mapNotNull { subEntity ->
                    when (subEntity) {
                        is String ->
                            LanguageContainer(emptyList(), emptyList(), emptyList(), listOf(subEntity))
                        is JsonObject ->
                            LanguageContainer(
Jonas Waeber's avatar
Jonas Waeber committed
61
62
63
                                de = languageTag(subEntity, "de"),
                                fr = languageTag(subEntity, "fr"),
                                it = languageTag(subEntity, "it"),
Jonas Waeber's avatar
Jonas Waeber committed
64
65
66
67
68
69
70
71
72
73
                                un = emptyList()
                            )
                        else -> {
                            log.error("Could not extract language container from subEntity $subEntity in entity $entity.")
                            null
                        }
                    }
                }.reduce { acc, languageContainer ->
                    acc.merge(languageContainer)
                })
Jonas Waeber's avatar
Jonas Waeber committed
74
75
76
77
            null -> {
                log.info("No items in $parent found.")
                emptyList()
            }
Jonas Waeber's avatar
Jonas Waeber committed
78
79
80
81
82
83
84
85
86
87
88
89
90
91
            else -> {
                log.error("Could not extract language container from entity: $entity in parent $parent.")
                emptyList()
            }
        }
    }

    fun typedEntityByType(
        entities: List<JsonObject>,
        field: String,
        type: String,
        annotationField: String
    ): List<LanguageContainer> {
        return entities.filter { it[field] == type }.flatMap {
Jonas Waeber's avatar
Jonas Waeber committed
92
            languageContainer(it["@id"] as String, it[annotationField])
Jonas Waeber's avatar
Jonas Waeber committed
93
94
95
        }
    }

Jonas Waeber's avatar
Jonas Waeber committed
96
    fun facetEntity(entities: List<JsonObject>, property: String): List<FacettedContainer> {
Jonas Waeber's avatar
Jonas Waeber committed
97
        return entities.mapNotNull {
Jonas Waeber's avatar
Jonas Waeber committed
98
            val lang = languageContainer(it["@id"] as String, it[property])
Jonas Waeber's avatar
Jonas Waeber committed
99
100
101
            if (lang.isEmpty()) {
                null
            } else {
Jonas Waeber's avatar
Jonas Waeber committed
102
                FacettedContainer(lang.first(), lang.first().toList())
Jonas Waeber's avatar
Jonas Waeber committed
103
104
105
106
107
108
            }
        }
    }

    fun extractSourceId(entities: List<JsonObject>): String {
        return entities.first {
109
110
            it["type"] as String == "original"
        }["identifier"] as String? ?: error("Identifier contains no property rico:identifier: $entities.")
Jonas Waeber's avatar
Jonas Waeber committed
111
112
113
114
115
116
117
    }

    fun extractTypedCreationRelationAgent(
        creationRelations: List<JsonObject>,
        input: Map<String, JsonObject>,
        creationRelationTypeParam: String,
        agentTypeParam: String
Jonas Waeber's avatar
Jonas Waeber committed
118
    ): List<RelationNameContainer> {
Jonas Waeber's avatar
Jonas Waeber committed
119
        return creationRelations.mapNotNull { creationRelation ->
120
            val creationRelationType = creationRelation["type"] as String
Jonas Waeber's avatar
Jonas Waeber committed
121
            if (creationRelationTypeParam != creationRelationType) return@mapNotNull null
Jonas Waeber's avatar
Jonas Waeber committed
122
123
124
125
            val name = languageContainer("creation-relation-name", creationRelation["name"])
            if (name.isEmpty()) {
                log.warn("No name for creation relation found: $creationRelation.")
            }
Jonas Waeber's avatar
Jonas Waeber committed
126
            val agent = Filter.entitiesByProperty("creationRelationHasTarget", creationRelation, input).first()
Jonas Waeber's avatar
Jonas Waeber committed
127
128
            val agentType = agent["@type"] as String
            if (agentType != agentTypeParam) return@mapNotNull null
Jonas Waeber's avatar
Jonas Waeber committed
129
            val agentName = languageContainer(agent["@id"] as String, agent["name"])
130
131
132
133
            if (agentName.isEmpty()) {
                log.error("Agent/Person/CorporateBody without rico:name found: ${agent["@id"]}.")
                null
            } else {
Jonas Waeber's avatar
Jonas Waeber committed
134
                return@mapNotNull RelationNameContainer(
Jonas Waeber's avatar
Jonas Waeber committed
135
136
137
                    agentName.reduce { acc, languageContainer -> acc.merge(languageContainer) },
                    if (name.isNotEmpty()) name.reduce { acc, languageContainer -> acc.merge(languageContainer) }
                    else LanguageContainer(emptyList(), emptyList(), emptyList(), listOf("Unknown")))
138
            }
Jonas Waeber's avatar
Jonas Waeber committed
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
        }
    }

    fun listOfStrings(value: Any?): List<String> {
        return when (value) {
            is String -> listOf(value)
            is List<*> ->
                if (value[0] is String)
                    value as List<String>
                else
                    emptyList()
            else ->
                emptyList()
        }
    }
154

Jonas Waeber's avatar
Jonas Waeber committed
155
    fun identifiers(value: Any?): List<String> {
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
        return when (value) {
            is String -> listOf(value)
            is JsonObject -> value["@id"].let { if (it is String) listOf(it) else emptyList() }
            is JsonArray<*> ->
                value.mapNotNull { item ->
                    when (item) {
                        is String -> item
                        is JsonObject -> value["@id"].let { id: Any? ->
                            if (id is String)
                                id
                            else null
                        }
                        else -> null
                    }
                }
            else -> emptyList()
        }
    }
Jonas Waeber's avatar
Jonas Waeber committed
174
}