In order to mitigate against the brute force attacks against Gitlab accounts, we are moving to all edu-ID Logins. We would like to remind you to link your account with your edu-id. Login will be possible only by edu-ID after November 30, 2021. Here you can find the instructions for linking your account.

If you don't have a SWITCH edu-ID, you can create one with this guide here

kind regards

This Server has been upgraded to GitLab release 14.2.6

RdfTransformer.kt 18.3 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/*
 * Drupal Sync Service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

Jonas Waeber's avatar
Jonas Waeber committed
19
20
package org.memobase

Jonas Waeber's avatar
Jonas Waeber committed
21
import ch.memobase.rdf.DC
22
23
24
25
26
27
28
import ch.memobase.rdf.MB
import ch.memobase.rdf.NS
import ch.memobase.rdf.RDA
import ch.memobase.rdf.RDF
import ch.memobase.rdf.RICO
import ch.memobase.rdf.SCHEMA
import ch.memobase.rdf.WD
Jonas Waeber's avatar
Jonas Waeber committed
29
import ch.memobase.rdf.XSD
30
import java.util.Properties
Jonas Waeber's avatar
Jonas Waeber committed
31
import org.apache.jena.datatypes.RDFDatatype
32
33
34
import org.apache.jena.rdf.model.Literal
import org.apache.jena.rdf.model.Model
import org.apache.jena.rdf.model.ModelFactory
Jonas Waeber's avatar
Jonas Waeber committed
35
import org.apache.jena.rdf.model.Property
36
37
import org.apache.jena.rdf.model.Resource
import org.apache.jena.rdf.model.ResourceFactory
Jonas Waeber's avatar
Jonas Waeber committed
38
import org.apache.logging.log4j.LogManager
Jonas Waeber's avatar
Jonas Waeber committed
39
40
import org.memobase.model.Address
import org.memobase.model.Institution
Jonas Waeber's avatar
Jonas Waeber committed
41
import org.memobase.model.Link
Jonas Waeber's avatar
Jonas Waeber committed
42
import org.memobase.model.RecordSet
43
import org.memobase.model.RichText
Jonas Waeber's avatar
Jonas Waeber committed
44

45
46
class RdfTransformer(properties: Properties) {
    private val log = LogManager.getLogger("RdfTransformer")
Jonas Waeber's avatar
Jonas Waeber committed
47

48
49
50
    private val cantons = Util.getCantons()
    private val municipalities = Util.getMunicipalities()
    private val languages = Util.loadLanguages(properties.getProperty(Util.languageSourceFilePathPropertyName))
Jonas Waeber's avatar
Jonas Waeber committed
51
52
53
54
55

    fun createInstitution(input: Institution): Pair<String, Model> {
        val model = ModelFactory.createDefaultModel()

        val resource = model.createResource(NS.mbcb + input.field_memobase_id)
56
57
        resource.addProperty(RDF.type, RICO.CorporateBody)
        resource.addProperty(RICO.type, "memobaseInstitution")
Jonas Waeber's avatar
Jonas Waeber committed
58
        resource.addLiteral(MB.isPublished, input.status)
Jonas Waeber's avatar
Jonas Waeber committed
59

60
61
62
        resource.addProperty(RICO.identifiedBy, addIdentifier(model, "main", input.field_memobase_id))
        if (input.field_old_memobase_id != null)
            resource.addProperty(RICO.identifiedBy, addIdentifier(model, "oldMemobase", input.field_old_memobase_id))
Jonas Waeber's avatar
Jonas Waeber committed
63

Jonas Waeber's avatar
Jonas Waeber committed
64
65
66
67
        input.recordset_ids.forEach {
            resource.addProperty(RICO.isHolderOf, NS.mbrs + it)
        }

Jonas Waeber's avatar
Jonas Waeber committed
68
69
70
        resource.addProperty(RICO.name, langLiteral(input.title, "de"))
        resource.addProperty(RICO.name, langLiteral(input.title_fr, "fr"))
        resource.addProperty(RICO.name, langLiteral(input.title_it, "it"))
Jonas Waeber's avatar
Jonas Waeber committed
71
72
73
74
75
76
        /* Description */
        addRichTextLiteralIfNotNull(
            resource,
            listOf(input.field_text, input.field_text_fr, input.field_text_it),
            RICO.descriptiveNote
        )
Jonas Waeber's avatar
Jonas Waeber committed
77

Jonas Waeber's avatar
Jonas Waeber committed
78
        input.field_address.forEach { address ->
79
            val location = generateLocationResource(model, address)
Jonas Waeber's avatar
Jonas Waeber committed
80
            resource.addProperty(RICO.hasLocation, location)
Jonas Waeber's avatar
Jonas Waeber committed
81
        }
Jonas Waeber's avatar
Jonas Waeber committed
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
        input.field_isil.let {
            if (it != null)
                resource.addProperty(WD.isil, literal(it))
        }
        input.field_email.let {
            if (it != null) {
                resource.addProperty(WD.emailAddress, literal(it))
            }
        }
        input.field_website.let {
            if (it != null)
                resource.addProperty(WD.website, literal(it.uri))
        }
        input.field_link_archive_catalog.let {
            if (it != null)
                resource.addProperty(WD.onlineArchive, literal(it.uri))
        }
Jonas Waeber's avatar
Jonas Waeber committed
99
100
101
        input.field_institution_types.forEach {
            resource.addProperty(WD.typeOfInstitution, model.createResource(NS.wd + it.substringAfterLast("/")))
        }
102
103
        if (input.computed_teaser_image_url != null)
            resource.addLiteral(WD.image, input.computed_teaser_image_url)
Jonas Waeber's avatar
Jonas Waeber committed
104
        return Pair(resource.uri, model)
Jonas Waeber's avatar
Jonas Waeber committed
105
106
    }

Jonas Waeber's avatar
Jonas Waeber committed
107
    fun createRecordSet(input: RecordSet): Pair<String, Model> {
108
109
110
        val model = ModelFactory.createDefaultModel()
        val resource = model.createResource(NS.mbrs + input.field_memobase_id)
        resource.addProperty(RDF.type, RICO.RecordSet)
Jonas Waeber's avatar
Jonas Waeber committed
111
        // Publikations Status
112
113
        resource.addLiteral(MB.isPublished, input.status)

Jonas Waeber's avatar
Jonas Waeber committed
114
115
116
117
118
119
        // Beschreibung
        addRichTextLiteralIfNotNull(
            resource,
            listOf(input.field_text, input.field_text_fr, input.field_text_it),
            RICO.descriptiveNote
        )
120

Jonas Waeber's avatar
Jonas Waeber committed
121
122
123
        // 0.1 Titel (Memobase)
        addTitle(resource, model, "main", listOf(input.title, input.title_fr, input.title_it))
        // + convenience label on the resource directly.
124
125
126
        resource.addProperty(RICO.title, langLiteral(input.title, "de"))
        resource.addProperty(RICO.title, langLiteral(input.title_fr, "fr"))
        resource.addProperty(RICO.title, langLiteral(input.title_it, "it"))
127

Jonas Waeber's avatar
Jonas Waeber committed
128
        // 0.2.1 Thumbnail Image
129
130
131
        if (input.computed_teaser_image_url != null)
            resource.addProperty(WD.image, literal(input.computed_teaser_image_url))

Jonas Waeber's avatar
Jonas Waeber committed
132
133
134
135
136
137
138
139
140
        // 1.1 Inhalt
        addRichTextLiteralIfNotNull(
            resource,
            listOf(input.field_content, input.field_content_fr, input.field_content_it),
            RICO.scopeAndContent
        )

        // 1.2 Entstehungszeitraum
        // is expected to always be a normalized value YYYY/YYYY.
Jonas Waeber's avatar
Jonas Waeber committed
141
142
143
144
145
146
147
148
        input.field_time_period.let {
            if (it != null) {
                val date = model.createResource()
                date.addProperty(RDF.type, RICO.DateRange)
                date.addProperty(RICO.normalizedDateValue, it)
                resource.addProperty(RICO.isAssociatedWithDate, date)
            }
        }
Jonas Waeber's avatar
Jonas Waeber committed
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
        // 1.3 Sprache
        addLiteralIfNotNull(
            resource,
            listOf(input.field_language_de, input.field_language_fr, input.field_language_it),
            RDA.hasLanguageOfResource
        )

        // 1.4 Zugang Memobase
        // Why is this implemented as a list?
        /*
        addIfNotNull(
            resource,
            listOf(input.field_access_memobase, input.field_access_memobase_fr, input.field_access_memobase_it),
            RICO.conditionsOfAccess
        )
         */
        // 2.1 Kontext
        addRichTextLiteralIfNotNull(
            resource,
            listOf(input.field_context, input.field_context_fr, input.field_context_it),
            RICO.history
        )
        // 3.1 Titel
        // Originaltitle des Bestandes
        addTitle(
            resource,
            model,
            "original",
            listOf(input.field_original_title, input.field_original_title_fr, input.field_original_title_it)
        )
        // 3.2 Umfang -> recordResourceExtent
        addRichTextLiteralIfNotNull(
            resource,
            listOf(input.field_scope, input.field_scope_fr, input.field_scope_it),
            RICO.recordResourceExtent
        )
        // 3.3 Auswahl / Vollständigkeit
        addRichTextLiteralIfNotNull(
            resource,
            listOf(input.field_selection_de, input.field_selection_fr, input.field_selection_it),
            RICO.integrity
        )
        // 3.4 Informationen zur Erschliessung
        addRichTextLiteralIfNotNull(
            resource,
            listOf(
                input.field_info_on_development_de,
                input.field_info_on_development_fr,
                input.field_info_on_development_it
            ),
            DC.conformsTo
        )
        // 3.5 Sprache Metadaten Records
        input.field_metadata_language_codes.forEach {
            // rico:hasLanguage metadata
            resource.addProperty(RICO.hasLanguage, addLanguage(model, it))
        }
        // 3.6 Rechte
        addRichTextLiteralIfNotNull(
            resource,
            listOf(
                input.field_rights_de,
                input.field_rights_fr,
                input.field_rights_it
            ),
            RICO.conditionsOfUse
        )
        // 3.7 Original ID
        input.field_original_id.let {
            if (it != null) {
                addIdentifier(model, "original", it)
            }
        }
        // 3.8 Original Signatur
        input.field_original_shelf_mark.let {
            if (it != null) {
                addIdentifier(model, "callNumber", it)
            }
        }
        // 3.9 Beschreibung (Text)
        // 3.9.1 Beschreibung (Link)
        addOriginalRecordSetLink(
            model,
            resource,
            listOf(
                input.field_original_description,
                input.field_original_description_fr,
                input.field_original_description_it
            )
        )

        // 4.1 Zugang
        // Why is this implemented as a list?
        /*
        addIfNotNull(
            resource,
            listOf(input.field_access, input.field_access_fr, input.field_access_it),
            RICO.conditionsOfAccess
        )
         */
        // 4.2 Zuständige Institution (Original)
        addRelatedInstitution(model, resource, "original", input.field_resp_institution_original)
        // 4.3 Zuständige Institution (Master)
        addRelatedInstitution(model, resource, "master", input.field_resp_institution_master)
        // 4.4 Zuständige Institution (Access)
        addRelatedInstitution(model, resource, "access", input.field_resp_institution_access)

        // Hard to deal with because these are lists. How to ensure that the correct translations are attached to each other?
        // sometimes two documents have a different language each but no translations.
        // 5.1 Projekt (Titel)
        // 5.2 Projekt (Link)
        // rdau:P60451 (RDA:hasSponsoringAgentOfResource)
        // ---
        // rico:CorporateBody
        // rico:type "memoriavProject"
        // rico:title
        // schema:sameAs

        // 5.3 Verwandte Bestände (Titel)
        // 5.4 Verwandte Bestände (Link)
        // rico:isRecordResourceAssociatedWithRecordResource
        // ---
        // rico:RecordSet
        // rico:title
        // schema:sameAs

        // 5.5 Publikationen (Titel)
        // 5.6 Publikationen (Link)
        // rico:isSubjectOf
        // ---
        // rico:Record
        // rico:title
        // schema:sameAs

        // 5.7 Dokumente (Titel)
        // 5.8 Dokumente (Link)
        // rico:isRecordResourceAssociatedWithRecordResource
        // ---
        // rico:Record
        // rico:title
        // schema:sameAs

        // 6.1 Datenübernahme
        addRichTextLiteralIfNotNull(
            resource,
            listOf(input.field_data_transfer, input.field_data_transfer_fr, input.field_data_transfer_it),
            RICO.descriptiveNote
        )
        // 6.2 Datum der Übernahme in Memobase
        input.field_transfer_date.let {
            if (it != null) {
                val date = it.split("T")[0]
                val literal = model.createTypedLiteral(date, XSD.date)
                resource.addLiteral(RICO.publicationDate, literal)
            }
        }
        // 6.3 Datum letzte Aktualisierung in Memobase
        val literal = model.createTypedLiteral(Util.now, XSD.dateTime)
        resource.addLiteral(RICO.modificationDate, literal)
        // 6.4 Memobase ID
        resource.addProperty(RICO.identifiedBy, addIdentifier(model, "main", input.field_memobase_id))
Jonas Waeber's avatar
Jonas Waeber committed
310

Jonas Waeber's avatar
Jonas Waeber committed
311
312
313
314
315
316
317
318
319
        // 8.1 Unterstützt durch Memoriav
        resource.addProperty(RDA.hasSponsoringAgentOfResource, model.createResource(Util.memoriavUri))
        // 8.2 Institution
        input.field_institution.forEach {
            resource.addProperty(RICO.heldBy, NS.mbcb + it)
        }
        // 9.3 Alte Memobase ID
        if (input.field_old_memobase_id != null)
            resource.addProperty(RICO.identifiedBy, addIdentifier(model, "oldMemobase", input.field_old_memobase_id))
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
        return Pair(resource.uri, model)
    }

    private fun addIdentifier(model: Model, type: String, value: String): Resource {
        val identifier = model.createResource()
        identifier.addProperty(RDF.type, RICO.Identifier)
        identifier.addProperty(RICO.type, literal(type))
        identifier.addProperty(RICO.identifier, literal(value))
        return identifier
    }

    private fun addLanguage(model: Model, value: String): Resource {
        val language = model.createResource()
        language.addProperty(RDF.type, RICO.Language)
        language.addProperty(RICO.type, literal("metadata"))
        languages[value].let {
            if (it == null) {
                language.addProperty(RICO.name, literal(value))
            } else {
                language.addProperty(RICO.name, langLiteral(it.de, "de"))
                language.addProperty(RICO.name, langLiteral(it.fr, "fr"))
                language.addProperty(RICO.name, langLiteral(it.it, "it"))
                language.addProperty(SCHEMA.sameAs, literal(it.id))
            }
        }
        return language
    }

Jonas Waeber's avatar
Jonas Waeber committed
348
349
350
    private fun addTitle(resource: Resource, model: Model, type: String, titles: List<String?>) {
        if (titles.all { it == null })
            return
351
352
        val language = model.createResource()
        language.addProperty(RDF.type, RICO.Title)
Jonas Waeber's avatar
Jonas Waeber committed
353
354
355
356
357
358
        language.addProperty(RICO.type, literal(type))
        titles.forEachIndexed { index, s ->
            if (s != null)
                language.addProperty(RICO.title, langLiteral(s, getLanguage(index)))
        }
        resource.addProperty(RICO.hasTitle, language)
Jonas Waeber's avatar
Jonas Waeber committed
359
360
    }

361
362
    private fun generateLocationResource(model: Model, address: Address): Resource {
        val location = model.createResource()
363

Jonas Waeber's avatar
Jonas Waeber committed
364
365
366
367
368
369
        val streetAddress = address.address_line1
        val secondAddressLine = address.address_line2
        val combinedStreetAddress = if (secondAddressLine.isNullOrEmpty()) {
            streetAddress
        } else {
            streetAddress + "\n" + secondAddressLine
Jonas Waeber's avatar
Jonas Waeber committed
370
        }
Jonas Waeber's avatar
Jonas Waeber committed
371
372
373
374
375
376
        val streetNumber = streetAddress.substringAfterLast(" ")
        val street = streetAddress.replace(streetNumber, "").trim()
        location.addProperty(WD.street, literal(street))
        location.addProperty(WD.streetNumber, literal(streetNumber))
        location.addProperty(WD.streetAddress, literal(combinedStreetAddress))

Jonas Waeber's avatar
Jonas Waeber committed
377

Jonas Waeber's avatar
Jonas Waeber committed
378
        val postalCode = address.postal_code.trim()
Jonas Waeber's avatar
Jonas Waeber committed
379

380
        location.addProperty(RDF.type, RICO.Place)
Jonas Waeber's avatar
Jonas Waeber committed
381
        location.addProperty(WD.postalCode, literal(postalCode))
Jonas Waeber's avatar
Jonas Waeber committed
382
383
384

        location.addProperty(WD.coordinates, literal(address.coordinates))

385
        val canton = model.createResource()
Jonas Waeber's avatar
Jonas Waeber committed
386
387
388
389
390
391
392
393
394
395
396
397
398
        canton.addProperty(RDF.type, RICO.Place)
        location.addProperty(WD.adminUnit, canton)
        cantons[address.administrative_area].let {
            // this should always be the case!
            if (it != null) {
                canton.addProperty(RICO.name, langLiteral(it.de, "de"))
                canton.addProperty(RICO.name, langLiteral(it.fr, "fr"))
                canton.addProperty(RICO.name, langLiteral(it.it, "it"))
                canton.addProperty(SCHEMA.sameAs, it.id)
            } else {
                // this shouldn't happen!
                canton.addProperty(RICO.name, literal("Unknown"))
            }
399
            canton.addProperty(RICO.type, literal("canton"))
Jonas Waeber's avatar
Jonas Waeber committed
400
401
        }

402
        val municipality = model.createResource()
Jonas Waeber's avatar
Jonas Waeber committed
403
404
405
406
407
408
409
410
411
412
        municipality.addProperty(RDF.type, RICO.Place)
        location.addProperty(WD.adminUnit, municipality)
        municipalities[postalCode].let {
            if (it != null) {
                municipality.addProperty(RICO.name, langLiteral(it.de, "de"))
                municipality.addProperty(RICO.name, langLiteral(it.fr, "fr"))
                municipality.addProperty(RICO.name, langLiteral(it.it, "it"))
                municipality.addProperty(SCHEMA.sameAs, it.id)
            } else {
                municipality.addProperty(RICO.name, literal(address.locality))
Jonas Waeber's avatar
Jonas Waeber committed
413
            }
414
            municipality.addProperty(RICO.type, "municipality")
Jonas Waeber's avatar
Jonas Waeber committed
415
416
417
        }
        // country is currently hard coded to switzerland!
        location.addProperty(WD.country, WD.switzerland)
Jonas Waeber's avatar
Jonas Waeber committed
418
419
420
        return location
    }

Jonas Waeber's avatar
Jonas Waeber committed
421
422
    private fun langLiteral(text: String, language: String): Literal =
        ResourceFactory.createLangLiteral(text.trim(), language)
423

Jonas Waeber's avatar
Jonas Waeber committed
424
    private fun literal(text: String): Literal = ResourceFactory.createPlainLiteral(text.trim())
425

Jonas Waeber's avatar
Jonas Waeber committed
426
427
428
429
    private fun addRichTextLiteralIfNotNull(resource: Resource, field: List<RichText?>, property: Property) {
        field.forEachIndexed { index, s ->
            if (s != null)
                resource.addProperty(property, langLiteral(s.value, getLanguage(index)))
430
431
        }
    }
Jonas Waeber's avatar
Jonas Waeber committed
432

Jonas Waeber's avatar
Jonas Waeber committed
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
    private fun getLanguage(index: Int): String {
        return when (index) {
            0 -> "de"
            1 -> "fr"
            2 -> "it"
            else -> "un"
        }
    }

    private fun addLiteralIfNotNull(resource: Resource, fields: List<String?>, property: Property) {
        fields.forEachIndexed { index, s ->
            if (s != null)
                resource.addProperty(property, langLiteral(s, getLanguage(index)))
        }
    }

    private fun addOriginalRecordSetLink(model: Model, resource: Resource, links: List<Link?>) {
        if (links.all { it == null })
            return
        val recordSet = model.createResource()
        recordSet.addProperty(RDF.type, RICO.RecordSet)
        links.forEachIndexed { index, link ->
            if (link != null) {
                recordSet.addLiteral(SCHEMA.sameAs, langLiteral(link.uri, getLanguage(index)))
                link.title.let { title ->
                    if (title != null) {
                        recordSet.addLiteral(RICO.title, langLiteral(title, getLanguage(index)))
                    }
                }
            }
        }
        resource.addProperty(RICO.hasSource, recordSet)
    }

    private fun addRelatedInstitution(model: Model, resource: Resource, type: String, item: List<String>) {
        item.forEach {
            val node = model.createResource()
            node.addProperty(RDF.type, RICO.RecordResourceHoldingRelation)
            node.addProperty(RICO.type, type)
            node.addProperty(RICO.recordResourceHoldingRelationHasTarget, resource)
            node.addProperty(RICO.recordResourceHoldingRelationHasSource, NS.mbrs + it)
            resource.addProperty(RICO.recordResourceOrInstantiationIsTargetOfRecordResourceHoldingRelation, node)
Jonas Waeber's avatar
Jonas Waeber committed
475
476
        }
    }
Jonas Waeber's avatar
Jonas Waeber committed
477
}