Unverified Commit e2d7a847 authored by Sebastian Schüpbach's avatar Sebastian Schüpbach
Browse files

general refactoring

parent 703508e8
Pipeline #14169 passed with stages
in 12 minutes and 6 seconds
......@@ -17,14 +17,14 @@
~ along with this program. If not, see <https://www.gnu.org/licenses/>.
-->
<Configuration status="debug" name="media-converter" packages="">
<Configuration status="info" name="media-converter" packages="">
<Appenders>
<Console name="STDOUT" target="SYSTEM_OUT">
<PatternLayout pattern="[%-5level] [%c{1}] %m%n"/>
</Console>
</Appenders>
<Loggers>
<Root level="debug">
<Root level="info">
<AppenderRef ref="STDOUT"/>
</Root>
</Loggers>
......
......@@ -65,8 +65,8 @@ class DisseminationCopyHandler(audioDestPath: String, imageDestPath: String, vid
val destFile = Paths.get(audioDestPath, destId + ".mp4")
val snippetFile = Paths.get(audioDestPath, destId + "-intro." + Conversions.getFileTypeExtension(sourceFileType).get)
writeData(data, tempFilePath)
Transformations.audioToMp4(tempFilePath.toString, destFile.toString).get
Transformations.createAudioSnippet(tempFilePath.toString, snippetFile.toString, audioSnippetDuration)
MediaTransformations.audioToMp4(tempFilePath.toString, destFile.toString).get
MediaTransformations.createAudioSnippet(tempFilePath.toString, snippetFile.toString, audioSnippetDuration)
Files.delete(tempFilePath)
destFile
}
......@@ -83,7 +83,7 @@ class DisseminationCopyHandler(audioDestPath: String, imageDestPath: String, vid
val tempFilePath = Files.createTempFile("media-", "." + Conversions.getFileTypeExtension(sourceFileType).get)
val destFile = Paths.get(imageDestPath, destId + ".jp2")
writeData(data, tempFilePath)
Transformations.imageToJp2(tempFilePath.toString, destFile.toString).get
MediaTransformations.imageToJp2(tempFilePath.toString, destFile.toString).get
Files.delete(tempFilePath)
destFile
}
......
......@@ -29,7 +29,7 @@ import scala.util.Try
/**
* Contains functions used to transform specific media files
*/
object Transformations extends Logging {
object MediaTransformations extends Logging {
import sys.process._
......
......@@ -44,12 +44,14 @@ class RecordProcessor(fileHandler: DisseminationCopyHandler, fedoraClientWrapper
}
def process(record: ConsumerRecord[String, String]): ProcessOutcome = {
(for {
val test = (for {
kafkaMsg <- BinaryResourceMetadata.build(record.value(), externalBaseUrl)
fileWithMetadata <- fedoraClientWrapper.fetchBinaryResource(kafkaMsg.filePath)
} yield createProcessResult(kafkaMsg.id, kafkaMsg.eventType, fileWithMetadata.fileType, fileWithMetadata.data))
test
.recover {
case e: ResourceWithoutBinary => ProcessIgnore(record.key(), e.getMessage)
case e: NoLocalBinary => ProcessIgnore(record.key(), e.getMessage)
case e: NoDigitalObject => ProcessIgnore(record.key(), e.getMessage)
case e: Exception => ProcessFailure(record.key(), UnknownFileType, "", e)
}
}.get
......
......@@ -22,7 +22,7 @@ package ch.memobase.models
import ujson.{Str, Value}
import scala.collection.mutable.ArrayBuffer
import scala.util.{Failure, Success, Try}
import scala.util.{Success, Try}
/**
......@@ -48,45 +48,80 @@ object BinaryResourceMetadata {
* @param externalBaseUrl Base URL of resource used outside of Fedora
* @return
*/
def build(msg: String, externalBaseUrl: String): Try[BinaryResourceMetadata] = Try {
def build(msg: String, externalBaseUrl: String): Try[BinaryResourceMetadata] = {
val jsonldGraph = ujson.read(msg).obj("@graph").arr
buildKafkaMessage(jsonldGraph, externalBaseUrl) match {
case Some(km) => km
case None => throw new ResourceWithoutBinary("Resource contains no binary object")
}
extractBinaryResourceMetadata(jsonldGraph, externalBaseUrl)
}
private def setEventType(eventAsString: String): Event = eventAsString match {
private def chooseEventType(eventAsString: String): Event = eventAsString match {
case "Create" => Create
case "Update" => Update
case "Delete" => Delete
case s => UnknownEvent(s)
}
private def buildKafkaMessage(jsonldGraph: ArrayBuffer[Value], baseUrl: String): Option[BinaryResourceMetadata] = {
jsonldGraph.value
.withFilter(v => isDigitalBinaryObject(v.obj, baseUrl))
.map { o =>
BinaryResourceMetadata(
o("@id").str.substring(s"$baseUrl/digital/".length).replaceFirst("/binary", ""),
o("@id").str,
o("hasMimeType").str,
setEventType(o("eventType").str))
}
.headOption
//noinspection ScalaStyle
private def extractBinaryResourceMetadata(jsonldGraph: ArrayBuffer[Value], baseUrl: String): Try[BinaryResourceMetadata] = Try {
val digitalObject = jsonldGraph.value
.collectFirst { case v if isDigitalObject(v.obj) => v.obj }
digitalObject match {
case Some(obj) if isLocalRecord(obj, baseUrl) && isProcessableMimeType(obj) =>
getEventType(jsonldGraph) match {
case Some(UnknownEvent(e)) => throw new UnknownEventType(s"Event type `$e` not known")
case Some(eventType) =>
BinaryResourceMetadata(
obj("@id").str.substring(s"$baseUrl/digital/".length),
obj("locator").str,
obj("hasMimeType").str,
eventType)
case None => throw new NoEventType
}
case Some(obj) if isLocalRecord(obj, baseUrl) => throw new UnmanageableMediaFileType("Media file type unknown")
case Some(_) => throw new NoLocalBinary
case None => throw new NoDigitalObject
}
}
private def isDigitalBinaryObject(obj: ujson.Obj, internalBaseUrl: String): Boolean = {
isObjectWrapper(obj) {
id => id.startsWith(s"$internalBaseUrl/digital") && id.endsWith("/binary")
private def isDigitalObject(obj: ujson.Obj): Boolean = {
hasKeyValue(obj, "type") {
_ == "digitalObject"
}
}
private def isObjectWrapper(obj: ujson.Obj)(f: String => Boolean): Boolean = {
Try(obj.value("@id")) match {
case Success(id: Str) => f(id.value)
case Success(_) => false
case Failure(_) => false
private def isRecord(obj: ujson.Obj): Boolean = {
hasKeyValue(obj, "@type") {
_ == "https://www.ica.org/standards/RiC/ontology#Record"
}
}
private def isProcessableMimeType(obj: ujson.Obj): Boolean = {
hasKeyValue(obj, "hasMimeType") {
value => Conversions.getMediaFileType(value).isDefined
}
}
private def isLocalRecord(obj: ujson.Obj, externalBaseUrl: String): Boolean = {
hasKeyValue(obj, "locator") {
value => value.startsWith(externalBaseUrl)
}
}
private def hasKeyValue(obj: ujson.Obj, key: String)(valueFun: String => Boolean): Boolean = {
Try(obj.value(key)) match {
case Success(id: Str) => valueFun(id.value)
case _ => false
}
}
private def getEventType(objList: ArrayBuffer[Value]): Option[Event] = {
objList
.collectFirst {
case v if isRecord(v.obj) && v.obj.contains("eventType") => chooseEventType(v.obj("eventType").str)
}
}
}
......@@ -38,3 +38,8 @@ case object Update extends Event
* Equals a `delete` event type produced by Fedora
*/
case object Delete extends Event
/**
* If event is unknown
*/
case class UnknownEvent(eventName: String) extends Event
......@@ -27,4 +27,11 @@ package ch.memobase.models
//noinspection ScalaFileName
class UnmanageableMediaFileType(msg: String) extends Exception(msg)
class ResourceWithoutBinary(msg: String) extends Exception(msg)
class NoDigitalObject extends Exception("No digital object found")
class NoLocalBinary extends Exception("No reference to local binary found")
class UnknownEventType(msg: String) extends Exception(msg)
class NoEventType extends Exception("No event type found")
......@@ -59,6 +59,11 @@ case object OgaFile extends AudioFileType
*/
case object JpegFile extends ImageFileType
/**
* Represents a PNG file
*/
case object PngFile extends ImageFileType
/**
* Represents a MPEG4 video file
*/
......@@ -71,7 +76,8 @@ object Conversions {
private val fileTypeTuples: List[(MediaFileType, List[String], String)] = List(
(Mp3File, List("audio/mpeg"), "mp3"),
(OgaFile, List("audio/ogg"), "oga"),
(JpegFile, List("image/jpg"), "jpg"),
(JpegFile, List("image/jpeg"), "jpg"),
(PngFile, List("image/png"), "png"),
(VideoMpeg4File, List("video/mp4"), "mp4")
// TODO: Other filetypes...
)
......
......@@ -17,6 +17,8 @@
"lastModified": "2020-06-30T10:07:26.563Z",
"lastModifiedBy": "fedoraAdmin",
"contains": "https://memobase.ch/digital/BAZ-MEI_77466-1/binary",
"hasMimeType": "{{mimeType}}",
"locator": "{{locator}}",
"identifiedBy": [
"https://memobase.ch/digital/BAZ-MEI_77466-1#genidd1c6f2c6-99a9-407a-970a-1ec31a8e0292",
"https://memobase.ch/digital/BAZ-MEI_77466-1#genidada014e8-ead3-459f-a7ea-ac0aaf02b392"
......@@ -58,14 +60,8 @@
},
{
"@id": "https://memobase.ch/record/BAZ-MEI_77466",
"@type": [
"fedora:Resource",
"https://www.ica.org/standards/RiC/ontology#Record",
"ldp:BasicContainer",
"ldp:Container",
"fedora:Container",
"ldp:RDFSource"
],
"@type": "https://www.ica.org/standards/RiC/ontology#Record",
"eventType": "{{eventType}}",
"fedora:created": {
"@type": "http://www.w3.org/2001/XMLSchema#dateTime",
"@value": "2020-06-30T09:45:42.286Z"
......
......@@ -16,6 +16,8 @@
"createdBy": "fedoraAdmin",
"lastModified": "2020-06-30T10:07:26.563Z",
"lastModifiedBy": "fedoraAdmin",
"contains": "https://memobase.ch/digital/BAZ-MEI_77466-1/binary",
"hasMimeType": "{{mimeType}}",
"identifiedBy": [
"https://memobase.ch/digital/BAZ-MEI_77466-1#genidd1c6f2c6-99a9-407a-970a-1ec31a8e0292",
"https://memobase.ch/digital/BAZ-MEI_77466-1#genidada014e8-ead3-459f-a7ea-ac0aaf02b392"
......@@ -57,14 +59,8 @@
},
{
"@id": "https://memobase.ch/record/BAZ-MEI_77466",
"@type": [
"fedora:Resource",
"https://www.ica.org/standards/RiC/ontology#Record",
"ldp:BasicContainer",
"ldp:Container",
"fedora:Container",
"ldp:RDFSource"
],
"@type": "https://www.ica.org/standards/RiC/ontology#Record",
"eventType": "{{eventType}}",
"fedora:created": {
"@type": "http://www.w3.org/2001/XMLSchema#dateTime",
"@value": "2020-06-30T09:45:42.286Z"
......
......@@ -19,7 +19,7 @@
package ch.memobase
import ch.memobase.models.BinaryResourceMetadata
import ch.memobase.models.{BinaryResourceMetadata, NoLocalBinary, UnknownEventType, UnmanageableMediaFileType}
import org.scalatest.funsuite.AnyFunSuite
import scala.io.Source
......@@ -28,17 +28,40 @@ class BinaryResourceMetadataTest extends AnyFunSuite {
val externalBaseUrl = "https://memobase.ch"
private def loadMessage: String = {
private def loadMessageWithBinaryResource(eventType: String, mimeType: String, locator: String): String = {
val file = Source.fromFile("src/test/resources/incoming_message_with_binary.json")
val result = file.mkString
.replaceAll(raw"\{\{eventType\}\}", "Create")
.replaceAll(raw"\{\{mimeType\}\}", "image/jpeg")
.replaceAll(raw"\{\{eventType\}\}", eventType)
.replaceAll(raw"\{\{mimeType\}\}", mimeType)
.replaceAll(raw"\{\{locator\}\}", locator)
file.close()
result
}
test("the value of the id field of a KafkaMessage should match the id of the parsed object") {
val km = BinaryResourceMetadata.build(loadMessage, externalBaseUrl)
assert(km.get.id == "BAZ-MEI_77466-1")
val km = BinaryResourceMetadata.build(loadMessageWithBinaryResource("Create", "image/jpeg", "https://memobase.ch/digital/BAZ-MEI_77466-1/binary"), externalBaseUrl)
assert(km.isSuccess)
}
test("a reference to a non-local binary should throw a NoLocalBinary exception") {
assertThrows[NoLocalBinary] {
BinaryResourceMetadata.build(loadMessageWithBinaryResource("Create", "image/jpeg", "https://example.com"), externalBaseUrl).get
}
}
test("a unmanageable mime type should throw a UnmanageableMediaFileType exception") {
assertThrows[UnmanageableMediaFileType] {
BinaryResourceMetadata.build(loadMessageWithBinaryResource("Create",
"application/pdf",
"https://memobase.ch/digital/BAZ-MEI_77466-1/binary"), externalBaseUrl).get
}
}
test("a unknown event type should throw a UnknownEventType exception") {
assertThrows[UnknownEventType] {
BinaryResourceMetadata.build(loadMessageWithBinaryResource("Upload",
"image/jpeg",
"https://memobase.ch/digital/BAZ-MEI_77466-1/binary"), externalBaseUrl).get
}
}
}
......@@ -25,6 +25,7 @@ import java.nio.file.{Files, Path, Paths}
import ch.memobase.models.{JpegFile, MediaFileType, Mp3File, VideoMpeg4File}
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.{Assertion, BeforeAndAfter}
import TestUtilities._
import scala.util.Try
......@@ -77,27 +78,33 @@ class DisseminationCopyHandlerTest extends AnyFunSuite with BeforeAndAfter {
* ATTENTION: Requires that ffmpeg is properly installed!
*/
test("calling the copyAudio function should create temporary file") {
val f = fixture
testCopy(f.resPath, "sample.mp3", "test.mp4", Mp3File, f.fileHandler.createAudioCopy)
deleteFiles("src/test/resources/test.mp4", "src/test/resources/test-intro.mp3")
runWithFFmpeg {
val f = fixture
testCopy(f.resPath, "sample.mp3", "test.mp4", Mp3File, f.fileHandler.createAudioCopy)
deleteFiles("src/test/resources/test.mp4", "src/test/resources/test-intro.mp3")
}
}
/**
* ATTENTION: Requires that Kakadu and imagemagick are properly installed!
*/
test("calling the copyImage function should create temporary file") {
val f = fixture
testCopy(f.resPath, "sample.jpg", "test.jp2", JpegFile, f.fileHandler.createImageCopy)
deleteFiles("src/test/resources/test.jp2")
runWithKakaduAndIM {
val f = fixture
testCopy(f.resPath, "sample.jpg", "test.jp2", JpegFile, f.fileHandler.createImageCopy)
deleteFiles("src/test/resources/test.jp2")
}
}
/**
* ATTENTION: Requires that ffmpeg is properly installed!
*/
test("calling the copyVideo function should create temporary file") {
val f = fixture
testCopy(f.resPath, "sample.mp4", "test.mp4", VideoMpeg4File, f.fileHandler.createVideoCopy)
deleteFiles("src/test/resources/test.mp4")
runWithFFmpeg {
val f = fixture
testCopy(f.resPath, "sample.mp4", "test.mp4", VideoMpeg4File, f.fileHandler.createVideoCopy)
deleteFiles("src/test/resources/test.mp4")
}
}
}
......@@ -22,46 +22,35 @@ package ch.memobase
import java.nio.file.{Files, Paths}
import org.scalatest.funsuite.AnyFunSuite
import TestUtilities._
class TransformationsTest extends AnyFunSuite {
private def appExists(name: String): Boolean = {
import sys.process._
(name !) == 0
}
class MediaTransformationsTest extends AnyFunSuite {
test("Sample mp3 should be transformed correctly") {
if (appExists("ffmpeg -version")) {
runWithFFmpeg {
val outFile = Files.createTempFile(Paths.get("src/test/resources"), "test-", ".mp4")
val res = Transformations.audioToMp4("src/test/resources/sample.mp3", outFile.toString)
val res = MediaTransformations.audioToMp4("src/test/resources/sample.mp3", outFile.toString)
outFile.toFile.delete()
assert(res.get == outFile.toString)
} else {
println("No ffmpeg binary found in $PATH")
}
}
test("Conversion of nonexistent mp3 should abort with error") {
if (appExists("ffmpeg -version")) {
val outFile = Files.createTempFile(Paths.get("src/test/resources"), "test-", ".mp4")
val res = Transformations.audioToMp4("src/test/resources/null.mp3", outFile.toString)
outFile.toFile.delete()
assert(res.isFailure)
} else {
println("No ffmpeg binary found in $PATH")
runWithFFmpeg {
val outFile = Files.createTempFile(Paths.get("src/test/resources"), "test-", ".mp4")
val res = MediaTransformations.audioToMp4("src/test/resources/null.mp3", outFile.toString)
outFile.toFile.delete()
assert(res.isFailure)
}
}
test("Sample jpeg should be transformed into jp2") {
if (appExists("kdu_compress -v") && appExists("convert -version")) {
runWithKakaduAndIM {
val outFile = Files.createTempFile(Paths.get("src/test/resources"), "test-", ".jp2")
val res = Transformations.imageToJp2("src/test/resources/sample.jpg", outFile.toString)
val res = MediaTransformations.imageToJp2("src/test/resources/sample.jpg", outFile.toString)
outFile.toFile.delete()
assert(res.isSuccess, res)
} else if (!appExists("kdu_compress -v"))
println("No kdu_compress binary found in $PATH")
else if (!appExists("convert -version"))
println("No convert binary found in $PATH")
}
}
}
......@@ -41,7 +41,7 @@ class RecordProcessorTest extends AnyFunSuite with MockFactory {
val mT = mimeType match {
case JpegFile => "image/jpeg"
case Mp3File => "audio/mp3"
case Mp3File => "audio/mpeg"
case VideoMpeg4File => "video/mpeg"
}
replaceTokensInIncomingMessage(eT, mT)
......@@ -54,11 +54,12 @@ class RecordProcessorTest extends AnyFunSuite with MockFactory {
}
}
private def replaceTokensInIncomingMessage(eventType: String, mimeType: String) = {
private def replaceTokensInIncomingMessage(eventType: String, mimeType: String, locator: String = "https://memobase.ch/digital/BAZ-MEI_77466-1/binary") = {
val is = Source.fromFile("src/test/resources/incoming_message_with_binary.json")
val incomingMessage = is.mkString
.replaceAll(raw"\{\{mimeType\}\}", mimeType)
.replaceAll(raw"\{\{eventType\}\}", eventType)
.replaceAll(raw"\{\{locator\}\}", locator)
val baos = copyIncomingMessage(incomingMessage)
is.close
(incomingMessage, baos)
......@@ -79,7 +80,7 @@ class RecordProcessorTest extends AnyFunSuite with MockFactory {
val mockFedoraClientWrapper: FedoraClientWrapper = mock[FedoraClientWrapper]
}
test("an object of mimeType image/jpg and eventType Create should trigger copyImage") {
test("an object of mimeType image/jpeg and eventType Create should trigger copyImage") {
val f = fixture
val mockDCH = f.mockDisseminationCopyHandler
val mockFCW = f.mockFedoraClientWrapper
......
/*
* Media Converter
* Extracts media files from Fedora repository
* Copyright (C) 2020 Memoriav
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package ch.memobase
import org.scalatest.Assertion
import scala.language.postfixOps
object TestUtilities {
def appExists(name: String): Boolean = {
import sys.process._
(name !) == 0
}
def runWithFFmpeg[T](f: => T): Unit = {
if (appExists("ffmpeg -version")) {
f
} else {
println("No ffmpeg binary found in $PATH")
}
}
def runWithKakaduAndIM[T](f: => T): Unit = {
if (appExists("kdu_compress -v") && appExists("convert -version")) {
f
} else if (!appExists("kdu_compress -v")) {
println("No kdu_compress binary found in $PATH")
}
else if (!appExists("convert -version")) {
println("No convert binary found in $PATH")
}
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment