SearchDocTransform.kt 10.8 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

package org.memobase

import com.beust.klaxon.JsonObject
import org.apache.logging.log4j.LogManager
Jonas Waeber's avatar
Jonas Waeber committed
23
import org.memobase.helpers.Extract
24
import org.memobase.helpers.ReuseStatementMap
Jonas Waeber's avatar
Jonas Waeber committed
25 26 27
import org.memobase.model.LanguageContainer
import org.memobase.model.NameContainer
import org.memobase.model.SearchDoc
Jonas Waeber's avatar
Jonas Waeber committed
28

Jonas Waeber's avatar
Jonas Waeber committed
29
class SearchDocTransform {
Jonas Waeber's avatar
Jonas Waeber committed
30
    private val log = LogManager.getLogger("SearchDocTransform")
Jonas Waeber's avatar
Jonas Waeber committed
31 32
    fun transform(input: Map<String, JsonObject>): SearchDoc {
        val record = input["record"] ?: error("No record defined in this message.")
33 34 35
        val digitalObject = input.values.firstOrNull { it["@type"] == "rico:Instantiation" && it["rico:type"] == "digitalObject" }
        val physicalObject = input.values.firstOrNull { it["@type"] == "rico:Instantiation" && it["rico:type"] == "physicalObject" }

Jonas Waeber's avatar
Jonas Waeber committed
36
        val id = record["@id"] as String
Jonas Waeber's avatar
Jonas Waeber committed
37

38 39 40 41
        val recordIdentifiers = Extract.getEntitiesFromIds("rico:identifiedBy", record, input)
        val recordTitles = Extract.getEntitiesFromIds("rico:hasTitle", record, input)
        val recordLanguages = Extract.getEntitiesFromIds("rico:hasLanguage", record, input)
        val recordRules = Extract.getEntitiesFromIds("rico:regulatedBy", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
42

Jonas Waeber's avatar
Jonas Waeber committed
43 44 45 46 47
        val datesCreated = Extract.getEntitiesFromIds("dct:created", record, input)
        val datesIssued = Extract.getEntitiesFromIds("dct:issued", record, input)
        val temporal = Extract.getEntitiesFromIds("dct:temporal", record, input)
        val placesRelated = Extract.getEntitiesFromIds("dct:spatial", record, input)
        val placeCaptured = Extract.getEntitiesFromIds("rdau:P60556", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
48

Jonas Waeber's avatar
Jonas Waeber committed
49 50
        val genre = Extract.getEntitiesFromIds("ebucore:hasGenre", record, input)
        val subjects = Extract.getEntitiesFromIds("rico:hasSubject", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
51

Jonas Waeber's avatar
Jonas Waeber committed
52 53 54 55
        val publishers = Extract.getEntitiesFromIds("rico:publishedBy", record, input)
        val producers = Extract.getEntitiesFromIds("rdau:P60441", record, input)
        val creationRelationAgents =
            Extract.getEntitiesFromIds("rico:recordResourceOrInstantiationIsSourceOfCreationRelation", record, input)
Jonas Waeber's avatar
Jonas Waeber committed
56

57 58 59 60 61 62 63 64 65
        val digitalRules = Extract.getEntitiesFromIds("rico:regulatedBy", digitalObject, input)

        val physicalRules = Extract.getEntitiesFromIds("rico:regulatedBy", physicalObject, input)
        val physicalIdentifiers = Extract.getEntitiesFromIds("rico:identifiedBy", physicalObject, input)

        val accessPhysical = Extract.typedEntityByType(physicalRules, "rico:type", "access", "rico:name").flatMap { it.toList() }
        val accessDigital = Extract.typedEntityByType(digitalRules, "rico:type", "access", "rico:name").flatMap { it.toList() }

        val usageDigital = Extract.typedEntityByType(digitalRules, "rico:type", "usage", "schema:sameAs").flatMap { it.toList() }
66 67 68 69
        val format = Extract.carrierType(Extract.getEntitiesFromIds("rico:hasCarrierType", physicalObject, input))
        if (format.isEmpty()) {
            log.error("Found no carrier types for record $id.")
        }
70

Jonas Waeber's avatar
Jonas Waeber committed
71
        return SearchDoc(
72 73 74
            title = Extract.typedEntityByType(recordTitles, "rico:type", "main", "rico:title"),
            seriesTitle = Extract.typedEntityByType(recordTitles, "rico:type", "series", "rico:title"),
            broadcastTitle = Extract.typedEntityByType(recordTitles, "rico:type", "broadcast", "rico:title"),
Jonas Waeber's avatar
Jonas Waeber committed
75 76
            type = record["rico:type"] as String,
            sourceID = try {
77
                Extract.extractSourceId(recordIdentifiers)
Jonas Waeber's avatar
Jonas Waeber committed
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
            } catch (ex: NoSuchElementException) {
                log.error("No source id found for record $id")
                "NoSourceIdFound"
            },
            sameAs = Extract.listOfStrings(record["schema:sameAs"]),
            abstract = Extract.extractLanguageContainer("dct:abstract", record["dct:abstract"]),
            id = id,
            institution = listOf(
                NameContainer(
                    LanguageContainer(listOf("Institution"), listOf("institution"), listOf("istituzione"), emptyList()),
                    listOf("https://memobase.ch/institution/MEMORIAV")
                )
            ),
            recordSet = NameContainer(
                LanguageContainer(listOf("Bestand"), listOf("collection"), listOf("fondo"), emptyList()),
                listOf("https://memobase.ch/recordSet/EXAMPLE")
            ),
            descriptiveNote = Extract.extractLanguageContainer("rico:descriptiveNote", record["rico:descriptiveNote"]),
            scopeAndContent = Extract.extractLanguageContainer("rico:scopeAndContent", record["rico:scopeAndContent"]),
            relatedMaterial = Extract.extractLanguageContainer("dct:relation", record["dct:relation"]),
            source = Extract.extractLanguageContainer("rico:source", record["rico:source"]),
            temporal = Extract.extractDate(temporal),
            dateCreated = Extract.extractDate(datesCreated),
            dateIssued = Extract.extractDate(datesIssued),
            placeCapture = Extract.extractPlaces(placeCaptured),
            placeRelated = Extract.extractPlaces(placesRelated),
            place = Extract.facetEntity(placeCaptured + placesRelated, "rico:name"),
105
            rightsHolder = Extract.typedEntityByType(recordRules, "rico:type", "holder", "rico:name"),
Jonas Waeber's avatar
Jonas Waeber committed
106
            memoriavClaim = record["rdau:P60451"] != null,
107 108 109
            languageCaption = Extract.typedEntityByType(recordLanguages, "rico:type", "caption", "rico:name"),
            languageContent = Extract.typedEntityByType(recordLanguages, "rico:type", "content", "rico:name"),
            language = Extract.facetEntity(recordLanguages, "rico:name"),
Jonas Waeber's avatar
Jonas Waeber committed
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
            genre = Extract.facetEntity(genre, "skos:prefLabel"),
            keywords = Extract.facetEntity(subjects, "skos:prefLabel"),
            agentSubject = Extract.typedEntityByType(subjects, "@type", "rico:Agent", "rico:name"),
            personSubject = Extract.typedEntityByType(subjects, "@type", "rico:Person", "rico:name"),
            corporateBodySubject = Extract.typedEntityByType(subjects, "@type", "rico:CorporateBody", "rico:name"),
            agentProducer = Extract.typedEntityByType(producers, "@type", "rico:Agent", "rico:name"),
            personProducer = Extract.typedEntityByType(producers, "@type", "rico:Person", "rico:name"),
            corporateBodyProducer = Extract.typedEntityByType(producers, "@type", "rico:CorporateBody", "rico:name"),
            agentPublisher = Extract.typedEntityByType(publishers, "@type", "rico:Agent", "rico:name"),
            personPublisher = Extract.typedEntityByType(publishers, "@type", "rico:Person", "rico:name"),
            corporateBodyPublisher = Extract.typedEntityByType(publishers, "@type", "rico:CorporateBody", "rico:name"),
            agentContributor = Extract.extractTypedCreationRelationAgent(
                creationRelationAgents,
                input,
                creationRelationTypeParam = "contributor",
                agentTypeParam = "rico:Agent"
            ),
            personContributor = Extract.extractTypedCreationRelationAgent(
                creationRelationAgents,
                input,
                creationRelationTypeParam = "contributor",
                agentTypeParam = "rico:Person"
            ),
            corporateBodyContributor = Extract.extractTypedCreationRelationAgent(
                creationRelationAgents,
                input,
                creationRelationTypeParam = "contributor",
                agentTypeParam = "rico:CorporateBody"
            ),
            agentCreator = Extract.extractTypedCreationRelationAgent(
                creationRelationAgents,
                input,
                creationRelationTypeParam = "creator",
                agentTypeParam = "rico:Agent"
            ),
            personCreator = Extract.extractTypedCreationRelationAgent(
                creationRelationAgents,
                input,
                creationRelationTypeParam = "creator",
                agentTypeParam = "rico:Person"
            ),
            corporateBodyCreator = Extract.extractTypedCreationRelationAgent(
                creationRelationAgents,
                input,
                creationRelationTypeParam = "creator",
                agentTypeParam = "rico:CorporateBody"
            ),
157
            // TODO: Implement collected agents!
Jonas Waeber's avatar
Jonas Waeber committed
158 159
            persons = emptyList(),
            corporateBodies = emptyList(),
160
            agents = emptyList(),
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182

            // DIGITAL & PHYSICAL
            access = accessPhysical + accessDigital,

            // DIGITAL
            accessDigital = accessDigital,
            durationDigital = Extract.listOfStrings(digitalObject?.get("ebucore:duration")),
            colourDigital = Extract.listOfStrings(digitalObject?.get("rdau:P60558")),
            digitalObjectNote = Extract.extractLanguageContainer("rico:descriptiveNote", digitalObject?.get("rico:descriptiveNote")),
            locator = Extract.listOfStrings(digitalObject?.get("ebucore:locator")),
            usageConditionsDigital = Extract.extractLanguageContainer("rico:conditionsOfUse", digitalObject?.get("rico:conditionsOfUse")),
            usageDigital = usageDigital,
            usageDigitalGroup = usageDigital.map { ReuseStatementMap.getValue(it) },

            // PHYSICAL
            accessPhysical = accessPhysical,
            durationPhysical = Extract.listOfStrings(physicalObject?.get("ebucore:duration")),
            colourPhysical = Extract.extractLanguageContainer("rdau:P60558", physicalObject?.get("rdau:P60558")),
            physicalCharacteristics = Extract.extractLanguageContainer("rico:physicalCharacteristics", physicalObject?.get("rico:physicalCharacteristics")),
            physicalObjectNote = Extract.extractLanguageContainer("rico:descriptiveNote", physicalObject?.get("rico:descriptiveNote")),
            usageConditionsPhysical = Extract.extractLanguageContainer("rico:conditionsOfUse", physicalObject?.get("rico:conditionsOfUse")),
            usagePhysical = Extract.typedEntityByType(physicalRules, "rico:type", "usage", "schema:sameAs").flatMap { it.toList() },
183 184
            callNumber = Extract.typedEntityByType(physicalIdentifiers, "rico:type", "callNumber", "rico:identifier").flatMap { it.toList() },
            format = format
Jonas Waeber's avatar
Jonas Waeber committed
185 186 187
        )
    }
}