In order to mitigate against the brute force attacks against Gitlab accounts, we are moving to all edu-ID Logins. We would like to remind you to link your account with your edu-id. Login will be possible only by edu-ID after November 30, 2021. Here you can find the instructions for linking your account.

If you don't have a SWITCH edu-ID, you can create one with this guide here

kind regards

This Server has been upgraded to GitLab release 14.2.6

Unverified Commit d7658413 authored by Sebastian Schüpbach's avatar Sebastian Schüpbach
Browse files

fix several small bugs


Signed-off-by: Sebastian Schüpbach's avatarSebastian Schüpbach <sebastian.schuepbach@unibas.ch>
parent c7e2dd06
Pipeline #17555 passed with stages
in 4 minutes and 5 seconds
......@@ -18,10 +18,6 @@
package org.memobase
import java.io.StringReader
import java.io.StringWriter
import java.net.MalformedURLException
import java.net.URL
import org.apache.jena.rdf.model.Model
import org.apache.jena.rdf.model.ModelFactory
import org.apache.jena.rdf.model.Resource
......@@ -33,13 +29,16 @@ import org.apache.logging.log4j.LogManager
import org.memobase.rdf.EBUCORE
import org.memobase.rdf.RDF
import org.memobase.rdf.RICO
import org.memobase.reports.OembedResponse
import org.memobase.reports.ReportMessages
import org.memobase.reports.ReportStatus
import org.memobase.settings.SettingsLoader
import org.memobase.sftp.SftpClient
import settings.HeaderExtractionTransformSupplier
import settings.HeaderMetadata
import java.io.StringReader
import java.io.StringWriter
import java.net.MalformedURLException
import java.net.URL
class KafkaTopology(private val settings: SettingsLoader) {
private val appSettings = settings.appSettings
......@@ -69,7 +68,7 @@ class KafkaTopology(private val settings: SettingsLoader) {
log.warn("Invalid input data. Check mapper service processing.")
Report(
key,
ReportStatus.failure,
ReportStatus.fatal,
generalFailureMessage = "Invalid input data. Check mapper service processing."
).toJson()
}
......@@ -82,8 +81,8 @@ class KafkaTopology(private val settings: SettingsLoader) {
createRecord(
value,
key,
ReportStatus.failure,
generalMessage = "No digital object resource present in model."
ReportStatus.warning,
generalFailureMessage = "No digital object resource present in model."
)
} else {
createRecord(value, key, ReportStatus.success)
......@@ -91,32 +90,51 @@ class KafkaTopology(private val settings: SettingsLoader) {
}
.mapValues { key, value ->
val recordResource = getRecordResource(value.second)
if (recordResource == null) {
updateRecord(value, ReportStatus.failure, generalMessage = "No record resource present in model.")
} else if (getOriginalIdentifier(recordResource) == null) {
updateRecord(value, ReportStatus.failure, generalMessage = ReportMessages.noOriginalIdentifier(key))
} else {
value
when {
recordResource == null -> {
updateRecord(
value,
ReportStatus.fatal,
generalMessage = "No record resource present in model."
)
}
getOriginalIdentifier(recordResource) == null -> {
updateRecord(
value,
ReportStatus.fatal,
generalMessage = ReportMessages.noOriginalIdentifier(key)
)
}
else -> {
value
}
}
}
.branch(
Predicate { _, value -> value.third.status == "FAILURE" },
Predicate { _, value -> value.third.status == ReportStatus.fatal },
Predicate { _, value -> value.third.status == ReportStatus.warning },
Predicate { _, _ -> true }
)
requiredFieldsAvailable[0]
.mapValues { _, value ->
log.warn("Record contains faulty data: ${value.third.digitalObjectMessage}. Abort processing of message")
value.third.toJson()
}
.to(reportingTopic)
requiredFieldsAvailable[1]
.mapValues { _, value ->
log.warn("Record contains faulty data: ${value.third.digitalObjectMessage}")
value.third.toJson()
}
.to(reportingTopic)
// TODO: To be discussed: Should messages with these failures eventually be forwarded?
requiredFieldsAvailable[0]
requiredFieldsAvailable[1]
.mapValues { value -> serializeModel(value.first.first) }
.to(settings.outputTopic)
val hasLocatorBranch = requiredFieldsAvailable[1]
val hasLocatorBranch = requiredFieldsAvailable[2]
.mapValues { readOnlyKey, value ->
addThumbnailSftpLocatorToModel(
readOnlyKey,
......@@ -131,7 +149,7 @@ class KafkaTopology(private val settings: SettingsLoader) {
val updateDigitalObjects = hasLocatorBranch[0]
.mapValues { readOnlyKey, value ->
val enrichedModel = addMediaSftpLocatorToModel(readOnlyKey, value)
if (enrichedModel.third.status == "FAILURE") {
if (enrichedModel.third.status == ReportStatus.warning) {
log.warn("A problem enriching the digital object occurred: ${enrichedModel.third.digitalObjectMessage}")
}
enrichedModel
......@@ -166,25 +184,33 @@ class KafkaTopology(private val settings: SettingsLoader) {
when {
isNoValidUrl(locator) -> {
log.warn("No valid locator url found for ${value.third.id}")
return updateRecord(value, ReportStatus.failure, thumbnailMessage = "no valid locator url")
return updateRecord(value, ReportStatus.warning, thumbnailMessage = "no valid locator url")
}
RemoteResourceHandler.isVimeoUrl(locator) -> {
log.info("Trying to download thumbnail file on vimeo for ${value.third.id}")
this.previewImageHandler.getFromVimeo(locator)
?: return updateRecord(
val thumbnailHandler = this.previewImageHandler.getFromVimeo(locator)
if (thumbnailHandler == null) {
log.warn("Download for ${value.third.id} failed!")
return updateRecord(
value,
ReportStatus.failure,
ReportStatus.warning,
thumbnailMessage = "couldn't fetch vimeo thumbnail"
)
}
thumbnailHandler
}
RemoteResourceHandler.isYoutubeUrl(locator) -> {
log.info("Trying to download thumbnail file on youtube for ${value.third.id}")
this.previewImageHandler.getFromYoutube(locator)
?: return updateRecord(
val thumbnailHandler = this.previewImageHandler.getFromYoutube(locator)
if (thumbnailHandler == null) {
log.warn("Download for ${value.third.id} failed!")
return updateRecord(
value,
ReportStatus.failure,
thumbnailMessage = "couldn't fetch youtube thumbail"
ReportStatus.warning,
thumbnailMessage = "couldn't fetch youtube thumbnail"
)
}
thumbnailHandler
}
else -> {
log.debug("Won't fetch thumbnail file for ${value.third.id} because no youtube/vimeo resource")
......@@ -194,11 +220,15 @@ class KafkaTopology(private val settings: SettingsLoader) {
thumbnailMessage = "no additional thumbnails fetched"
)
}
}.let {
val enrichedValue = addDimensionsToDigitalObject(value, it.first)
return it.second?.let { locator ->
addLocalThumbnail(enrichedValue, recordResource, digitalObjectResource, locator)
} ?: enrichedValue
}.let { h ->
val enrichedValue = addDimensionsToDigitalObject(value, h.first)
val filePath = h.second
return if (filePath != null) {
addLocalThumbnail(enrichedValue, recordResource, digitalObjectResource, filePath)
} else {
log.warn("No thumbnail url available for ${value.third.id}")
enrichedValue
}
}
}
}
......@@ -234,7 +264,7 @@ class KafkaTopology(private val settings: SettingsLoader) {
log.warn("Couldn't move downloaded thumbnail file to $destPath for ${value.third.id}")
return updateRecord(
value,
ReportStatus.failure,
ReportStatus.warning,
thumbnailMessage = "upload of youtube / vimeo thumbnail to sFTP server failed"
)
}
......@@ -335,7 +365,7 @@ class KafkaTopology(private val settings: SettingsLoader) {
?: return if (type == Constant.thumbnailFolderName) {
updateRecord(data, ReportStatus.success, thumbnailMessage = "no local thumbnails available")
} else {
updateRecord(data, ReportStatus.failure, digitalObjectMessage = ReportMessages.reportFailure(key, type))
updateRecord(data, ReportStatus.warning, digitalObjectMessage = ReportMessages.reportFailure(key, type))
}
return if (type == Constant.mediaFolderName) {
addLocatorToDigitalObjectResource(data.first.first, link, digitalObjectResource)
......@@ -417,14 +447,14 @@ class KafkaTopology(private val settings: SettingsLoader) {
value: Pair<Pair<Model, HeaderMetadata>, List<Resource>>,
messageId: String,
status: String,
generalMessage: String = "",
generalFailureMessage: String = "",
digitalObjectMessage: String = "",
thumbnailMessage: String = ""
): Triple<Pair<Model, HeaderMetadata>, List<Resource>, Report> {
val report = Report(
messageId,
status,
generalFailureMessage = generalMessage,
generalFailureMessage = generalFailureMessage,
digitalObjectMessage = digitalObjectMessage,
thumbnailMessage = thumbnailMessage
)
......
package org.memobase.reports
package org.memobase
import com.beust.klaxon.Klaxon
......
package org.memobase
import org.apache.logging.log4j.LogManager
import org.memobase.exceptions.SftpClientException
import org.memobase.sftp.SftpClient
import java.io.ByteArrayOutputStream
import java.io.FileNotFoundException
import java.io.FileOutputStream
......@@ -10,10 +13,6 @@ import java.net.URLEncoder
import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.nio.file.Paths
import org.apache.logging.log4j.LogManager
import org.memobase.exceptions.SftpClientException
import org.memobase.reports.OembedResponse
import org.memobase.sftp.SftpClient
/**
* Queries oembed API for videos on Vimeo or Youtube and downloads poster images
......@@ -22,6 +21,7 @@ class RemoteResourceHandler(private val sftpClient: SftpClient) {
private val log = LogManager.getLogger("MediaLinker")
companion object {
private val log = LogManager.getLogger("MediaLinker")
/**
* Checks if URL points to Vimeo
*
......@@ -55,6 +55,7 @@ class RemoteResourceHandler(private val sftpClient: SftpClient) {
}
}
} catch (e: FileNotFoundException) {
log.warn("Couldn't find oembed object on $urlAsString. Is this resource still available?")
return null
}
val result = String(outputStream.toByteArray())
......@@ -125,7 +126,6 @@ class RemoteResourceHandler(private val sftpClient: SftpClient) {
* Get embedding information and preview image from Vimeo
*
* @param videoURL URL of video
* @param width Width of preview image
*
* @return Pair with Oembed object and path to local file
*/
......
......@@ -2,5 +2,7 @@ package org.memobase.reports
object ReportStatus {
const val success = "SUCCESS"
const val failure = "FAILURE"
const val ignore = "IGNORE"
const val warning = "WARNING"
const val fatal = "FATAL"
}
......@@ -18,12 +18,6 @@
package org.memobase
import com.beust.klaxon.Klaxon
import java.io.File
import java.io.FileInputStream
import java.io.FileOutputStream
import java.nio.charset.Charset
import java.nio.file.Paths
import java.util.stream.Stream
import org.apache.jena.rdf.model.ModelFactory
import org.apache.jena.riot.Lang
import org.apache.jena.riot.RDFDataMgr
......@@ -41,6 +35,12 @@ import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.MethodSource
import org.memobase.rdf.NS
import org.memobase.testing.EmbeddedSftpServer
import java.io.File
import java.io.FileInputStream
import java.io.FileOutputStream
import java.nio.charset.Charset
import java.nio.file.Paths
import java.util.stream.Stream
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
class TestKafkaTopology {
......@@ -172,7 +172,7 @@ class TestKafkaTopology {
2,
"test1.yml",
"https://memobase.ch/record/test-institution-id-MEI_49884",
"https://memobase.ch/record/test-institution-id-MEI_49884"
""
),
TestParams(
4,
......
{
"id": "https://memobase.ch/record/test-institution-id-MEI_49884",
"status": "FAILURE",
"status": "FATAL",
"message": "The record https://memobase.ch/record/test-institution-id-MEI_49884 does not contain a identifier with rico:type 'original'!"
}
\ No newline at end of file
<https://memobase.ch/digital/BAZ-MEI_49885-0> <http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#locator> "https://example.org" .
<https://memobase.ch/digital/BAZ-MEI_49885-0> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.ica.org/standards/RiC/ontology#Instantiation> .
<https://memobase.ch/digital/BAZ-MEI_49885-0> <https://www.ica.org/standards/RiC/ontology#type> "digitalObject" .
<https://memobase.ch/physical/BAZ-MEI_49885-0> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.ica.org/standards/RiC/ontology#Instantiation> .
<https://memobase.ch/physical/BAZ-MEI_49885-0> <https://www.ica.org/standards/RiC/ontology#type> "physicalObject" .
<https://memobase.ch/record/test-institution-id-MEI_49884> <http://memobase.ch/internal/isPublished> "false"^^<http://www.w3.org/2001/XMLSchema#boolean> .
<https://memobase.ch/record/test-institution-id-MEI_49884> <http://rdaregistry.info/Elements/u/P60451> <https://memobase.ch/institution/mrv> .
<https://memobase.ch/record/test-institution-id-MEI_49884> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.ica.org/standards/RiC/ontology#Record> .
<https://memobase.ch/record/test-institution-id-MEI_49884> <https://www.ica.org/standards/RiC/ontology#hasInstantiation> <https://memobase.ch/digital/BAZ-MEI_49885-1> .
<https://memobase.ch/record/test-institution-id-MEI_49884> <https://www.ica.org/standards/RiC/ontology#heldBy> <https://memobase.ch/institution/test-record-set-id> .
<https://memobase.ch/record/test-institution-id-MEI_49884> <https://www.ica.org/standards/RiC/ontology#isPartOf> <https://memobase.ch/recordSet/test-institution-id> .
<https://memobase.ch/record/test-institution-id-MEI_49884> <https://www.ica.org/standards/RiC/ontology#type> "Foto" .
\ No newline at end of file
......@@ -68,6 +68,26 @@ mbr:Tanzarchiv-42858-43349
rico:title "Additional Tones" ;
rico:type "Video" .
_:b1 a rico:CreationRelation ;
rico:creationRelationHasSource mbr:Tanzarchiv-42858-43349 ;
rico:creationRelationHasTarget [ a rico:Person ;
rico:agentIsTargetOfCreationRelation
_:b1 ;
rico:name "Christophe Calpini"
] ;
rico:name "Komponisten / Compositeur" ;
rico:type "contributor" .
_:b2 a rico:CreationRelation ;
rico:creationRelationHasSource mbr:Tanzarchiv-42858-43349 ;
rico:creationRelationHasTarget [ a rico:Person ;
rico:agentIsTargetOfCreationRelation
_:b2 ;
rico:name "Katarzyna Gdaniec (Choreograf / Chorégraphe)"
] ;
rico:name "Author" ;
rico:type "creator" .
mbdo:Tanzarchiv-42858-43349-1
a rico:Instantiation ;
ebucore:height "360" ;
......@@ -83,16 +103,6 @@ mbdo:Tanzarchiv-42858-43349-1
mbpo:Tanzarchiv-42858-43349-1 ;
rico:type "digitalObject" .
_:b2 a rico:CreationRelation ;
rico:creationRelationHasSource mbr:Tanzarchiv-42858-43349 ;
rico:creationRelationHasTarget [ a rico:Person ;
rico:agentIsTargetOfCreationRelation
_:b2 ;
rico:name "Katarzyna Gdaniec (Choreograf / Chorégraphe)"
] ;
rico:name "Author" ;
rico:type "creator" .
mbpo:Tanzarchiv-42858-43349-1
a rico:Instantiation ;
rdau:P60558 "Farbig / Couleur" ;
......@@ -100,14 +110,14 @@ mbpo:Tanzarchiv-42858-43349-1
rico:name "Träger / Support: Digitalisat (DIGG, DIGK) – Aufzeichnungsformat: H264"
] ;
rico:hasDerivedInstantiation mbdo:Tanzarchiv-42858-43349-1 ;
rico:identifiedBy [ a rico:Identifier ;
rico:identifier "181-28-1; Träger: 181-28-DIGG-MAS" ;
rico:type "callNumber"
] ;
rico:identifiedBy [ a rico:Identifier ;
rico:identifier "Tanzarchiv-42858-43349-1" ;
rico:type "main"
] ;
rico:identifiedBy [ a rico:Identifier ;
rico:identifier "181-28-1; Träger: 181-28-DIGG-MAS" ;
rico:type "callNumber"
] ;
rico:instantiates mbr:Tanzarchiv-42858-43349 ;
rico:physicalCharacteristics "Seitenverhältnis: 16:9 normal" , "Bemerkung: 960x540 <br> Zustand / Etat pac: Intakt / Intact<br> Ton / Son: 48KHz2 Kanäle" ;
rico:regulatedBy [ a rico:Rule ;
......@@ -134,13 +144,3 @@ _:b0 a rico:CreationRelation ;
] ;
rico:name "Kompanie / Compagnie" ;
rico:type "contributor" .
_:b1 a rico:CreationRelation ;
rico:creationRelationHasSource mbr:Tanzarchiv-42858-43349 ;
rico:creationRelationHasTarget [ a rico:Person ;
rico:agentIsTargetOfCreationRelation
_:b1 ;
rico:name "Christophe Calpini"
] ;
rico:name "Komponisten / Compositeur" ;
rico:type "contributor" .
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment