Ingester.kt 4.95 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/*
 * fedora-ingest-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
Thomas Bernhart's avatar
Thomas Bernhart committed
18
19
package org.memobase

20
21
22
23
import ch.memobase.exceptions.SftpClientException
import ch.memobase.fedora.FedoraClient
import ch.memobase.fedora.FedoraTransactionClient
import ch.memobase.fedora.RdfContentTypes
24
import ch.memobase.sftp.SftpClient
25
import java.io.File
26
import java.io.IOException
27
28
import java.io.StringWriter
import java.net.URI
29
import java.net.URISyntaxException
30
import org.apache.jena.rdf.model.Model
Jonas Waeber's avatar
Jonas Waeber committed
31
32
import org.apache.jena.riot.Lang
import org.apache.jena.riot.RDFDataMgr
Thomas Bernhart's avatar
Thomas Bernhart committed
33
34
import org.apache.logging.log4j.LogManager
import org.fcrepo.client.FcrepoOperationFailedException
35
import org.memobase.exceptions.MissingMimeTypeException
Thomas Bernhart's avatar
Thomas Bernhart committed
36

37
class Ingester(
38
    private val sftpClient: SftpClient,
39
40
    private val fedoraClient: FedoraClient,
    private val externalBaseUrl: String
41
) {
Thomas Bernhart's avatar
Thomas Bernhart committed
42

43
    private val log = LogManager.getLogger("FedoraIngester")
Thomas Bernhart's avatar
Thomas Bernhart committed
44

45
    @Throws(FcrepoOperationFailedException::class, IOException::class, MissingMimeTypeException::class, SftpClientException::class, URISyntaxException::class)
46
    fun ingest(id: String, content: String) {
Jonas Waeber's avatar
Jonas Waeber committed
47
        log.info("Begin ingest of message with id $id.")
48
        val rdfHandler = RdfHandler(content, externalBaseUrl)
49
50
        val recordOutput = StringWriter()
        val recordPair = rdfHandler.getRecord()
Jonas Waeber's avatar
Jonas Waeber committed
51
        RDFDataMgr.write(recordOutput, recordPair.second, Lang.NTRIPLES)
52
        val data = recordOutput.toString()
53

54
55
56
57
58
59
60
61
        fedoraClient.startTransaction().use { transaction ->
            // create placeholders referenced resources:
            val nonBinaryResources = rdfHandler.getReferencedNonBinaryResources()
            nonBinaryResources.forEach { resource ->
                log.info("Creating placeholder for resource $resource.")
                transaction.createPlaceholder(URI(resource))
                log.info("Created placeholder for resource $resource.")
            }
62

63
            // ingest record, instantiations and binaries:
64
            log.info("Ingesting record ${recordPair.first}.")
65
            transaction.createOrUpdateRdfResource(URI(recordPair.first), data, RdfContentTypes.NTRIPLES)
66
            log.info("Ingested record ${recordPair.first}.")
67
            ingestInstantiations(rdfHandler.getInstantiations(), transaction)
68
            val sftpLocators = rdfHandler.getSftpLocators()
Jonas Waeber's avatar
Jonas Waeber committed
69
            if (sftpLocators.isNotEmpty()) {
70
71
                ingestBinaries(sftpLocators, rdfHandler, transaction)
            }
72
73
            transaction.commit()
        }
74
        log.info("End ingest of message with id $id.")
75
    }
76

77
    @Throws(FcrepoOperationFailedException::class, IOException::class, URISyntaxException::class)
78
    private fun ingestInstantiations(instantiations: List<Pair<String, Model>>, transaction: FedoraTransactionClient) {
79
        instantiations.forEach { instantiationPair ->
80
            val instantiationOutput = StringWriter()
Jonas Waeber's avatar
Jonas Waeber committed
81
            RDFDataMgr.write(instantiationOutput, instantiationPair.second, Lang.NTRIPLES)
82
83
            val instantiationData = instantiationOutput.toString()
            log.info("Ingesting instantiation ${instantiationPair.first}.")
84
85
86
87
88
            transaction.createOrUpdateRdfResource(
                URI(instantiationPair.first),
                instantiationData,
                RdfContentTypes.NTRIPLES
            )
89
            log.info("Ingested instantiation ${instantiationPair.first}.")
90
        }
91
    }
92

93
    @Throws(FcrepoOperationFailedException::class, IOException::class, MissingMimeTypeException::class, SftpClientException::class, URISyntaxException::class)
94
95
96
97
98
    private fun ingestBinaries(
        sftpLocators: List<Pair<String, String?>>,
        rdfHandler: RdfHandler,
        transaction: FedoraTransactionClient
    ) {
99
100
101
102
103
104
105
106
107
        sftpLocators.forEach {
            val digitalInstantiationUrl = it.first
            it.second.let { path ->
                if (path != null) {
                    sftpClient.open(File(path)).use { stream ->
                        val binaryUri = "$digitalInstantiationUrl/${Service.BINARY_FILE_URI_PATH}"
                        val mimeType = rdfHandler.getMimeType(digitalInstantiationUrl)
                        log.info("Ingesting binary $binaryUri with mime type $mimeType.")
                        transaction.createOrUpdateBinaryResource(URI(binaryUri), stream, mimeType)
108
109
110
111
                    }
                }
            }
        }
Thomas Bernhart's avatar
Thomas Bernhart committed
112
    }
113
}