Unverified Commit a098d053 authored by Sebastian Schüpbach's avatar Sebastian Schüpbach
Browse files

add youtube / vimeo thumbnail fetching

parent 01fb9b18
Pipeline #16006 passed with stages
in 4 minutes and 47 seconds
......@@ -2,10 +2,12 @@ package org.memobase
object Constant {
const val mediaFolderName = "media"
const val thumbnailFolderName ="thumbnails"
const val thumbnailFolderName = "thumbnails"
const val sftpBasePathPropertyName = "sftp.basePath"
const val extensionsPropertyName = "extensions"
const val vimeoThumbnailWidth = 1000
const val rdfParserLang = "NTRIPLES"
const val digitalObject = "digitalObject"
......@@ -13,5 +15,4 @@ object Constant {
const val thumbnailRicoType = "thumbnail"
const val sftpPathPrefix = "sftp:"
}
\ No newline at end of file
}
......@@ -18,6 +18,8 @@
package org.memobase
import java.io.StringReader
import java.io.StringWriter
import org.apache.jena.rdf.model.Model
import org.apache.jena.rdf.model.ModelFactory
import org.apache.jena.rdf.model.Resource
......@@ -33,13 +35,12 @@ import org.memobase.settings.SettingsLoader
import org.memobase.sftp.SftpClient
import settings.HeaderExtractionTransformSupplier
import settings.HeaderMetadata
import java.io.StringReader
import java.io.StringWriter
class KafkaTopology(private val settings: SettingsLoader) {
private val appSettings = settings.appSettings
private val sftpClient = SftpClient(settings.sftpSettings)
private val previewImageHandler = PreviewImageHandler(sftpClient)
private val sftpBasePath = appSettings.getProperty(Constant.sftpBasePathPropertyName)
private val fileExtensions = appSettings.getProperty(Constant.extensionsPropertyName).split(",")
private val reportingTopic = settings.processReportTopic
......@@ -68,7 +69,6 @@ class KafkaTopology(private val settings: SettingsLoader) {
val updateDigitalObjects = instantiationBranch[0]
.mapValues { readOnlyKey, value -> enrichSftpLocator(readOnlyKey, value, Constant.mediaFolderName) }
updateDigitalObjects
.filterNot { _, value -> value.third.status == ReportStatus.failure } // failed records are deleted.
.mapValues { value ->
......@@ -85,6 +85,7 @@ class KafkaTopology(private val settings: SettingsLoader) {
instantiationBranch[1]
.filterNot { _, value -> value.third.status == ReportStatus.failure } // failed records are deleted.
.mapValues { value -> fetchThumbnailForYoutubeOrVimeoFile(value) }
.mapValues { value ->
val out = StringWriter()
value.first.first.write(out, Constant.rdfParserLang)
......@@ -99,6 +100,58 @@ class KafkaTopology(private val settings: SettingsLoader) {
return builder
}
private fun fetchThumbnailForYoutubeOrVimeoFile(value: Triple<Pair<Model, HeaderMetadata>, List<Resource>, Report>): Triple<Pair<Model, HeaderMetadata>, List<Resource>, Report> {
if (noThumbnailAttached(value.second)) {
val record = value.second.firstOrNull { it.hasProperty(RDF.type, RICO.Record) }
val digitalObject = value.second.firstOrNull { it.hasProperty(RDF.type, Constant.digitalObject) }
if (record != null && digitalObject != null) {
val locator = digitalObject.getProperty(EBUCORE.locator).string
when {
PreviewImageHandler.isVimeoUrl(locator) -> {
this.previewImageHandler.getFromVimeo(locator, Constant.vimeoThumbnailWidth)
}
PreviewImageHandler.isYoutubeUrl(locator) -> {
this.previewImageHandler.getFromYoutube(locator)
}
else -> {
null
}
}?.let {
val pathOnSftpServer = previewImageHandler.moveFileToSFTP(
it,
"$sftpBasePath/${value.first.second.recordSetId}/${Constant.thumbnailFolderName}/$value.jpg"
)
if (pathOnSftpServer != null) {
createThumbnailResource(value.first.first, record, digitalObject, pathOnSftpServer)
val amendedReport = Report(
value.third.id,
value.third.status,
value.third.message + "; youtube / vimeo thumbnail fetched"
)
return value.copy(third = amendedReport)
} else {
val amendedReport = Report(
value.third.id,
ReportStatus.failure,
value.third.message + "; youtube / vimeo thumbnail couldn't be uploaded to Sftp server"
)
return value.copy(third = amendedReport)
}
}
}
}
val amendedReport = Report(
value.third.id,
ReportStatus.success,
value.third.message + "; no additional youtube / vimeo thumbnails fetched"
)
return value.copy(third = amendedReport)
}
private fun noThumbnailAttached(resources: List<Resource>): Boolean {
return resources.none { it.hasProperty(RICO.type, Constant.thumbnailRicoType) }
}
private fun extractSubjects(input: Pair<Model, HeaderMetadata>): Pair<Pair<Model, HeaderMetadata>, List<Resource>> {
return Pair(input, input.first.listSubjects().toList())
}
......@@ -123,12 +176,31 @@ class KafkaTopology(private val settings: SettingsLoader) {
return Pair(model, data.second)
}
private fun createThumbnailResource(
data: Model,
record: Resource,
digitalObject: Resource,
locator: String
) {
val thumbnail = data.createResource(
"https://memobase.ch/digital/${digitalObject.uri.substringAfterLast("/")}/derived"
)
val literal = ResourceFactory.createPlainLiteral(locator)
thumbnail.addProperty(RDF.type, RICO.Instantiation)
thumbnail.addProperty(RICO.type, Constant.thumbnailRicoType)
thumbnail.addProperty(EBUCORE.locator, literal)
digitalObject.addProperty(RICO.hasDerivedInstantiation, thumbnail)
thumbnail.addProperty(RICO.isDerivedFromInstantiation, digitalObject)
record.addProperty(RICO.hasInstantiation, thumbnail)
thumbnail.addProperty(RICO.instantiates, record)
}
private fun enrichSftpLocator(
key: String,
data: Triple<Pair<Model, HeaderMetadata>, List<Resource>, Report>,
type: String
): Triple<Pair<Model, HeaderMetadata>, List<Resource>, Report> {
var link = ""
var link: String
return data.second.firstOrNull { it.hasProperty(RDF.type, RICO.Record) }.let { record ->
if (record != null) {
data.second.firstOrNull { it.hasProperty(RICO.type, Constant.digitalObject) }.let { digitalObject ->
......@@ -153,17 +225,7 @@ class KafkaTopology(private val settings: SettingsLoader) {
digitalObject.addLiteral(EBUCORE.locator, literal)
data.first.first.createLiteral(digitalObject.toString(), true)
} else if (type == Constant.thumbnailFolderName) {
val thumbnail = data.first.first.createResource(
"https://memobase.ch/digital/${digitalObject.uri.substringAfterLast("/")}/derived"
)
val literal = ResourceFactory.createPlainLiteral(link)
thumbnail.addProperty(RDF.type, RICO.Instantiation)
thumbnail.addProperty(RICO.type, Constant.thumbnailRicoType)
thumbnail.addProperty(EBUCORE.locator, literal)
digitalObject.addProperty(RICO.hasDerivedInstantiation, thumbnail)
thumbnail.addProperty(RICO.isDerivedFromInstantiation, digitalObject)
record.addProperty(RICO.hasInstantiation, thumbnail)
thumbnail.addProperty(RICO.instantiates, record)
createThumbnailResource(data.first.first, record, digitalObject, link)
}
return Triple(
data.first,
......
package org.memobase
import java.io.FileNotFoundException
import java.io.FileOutputStream
import java.io.IOException
import java.net.HttpURLConnection
import java.net.URL
import java.nio.file.Files
import java.nio.file.Paths
import org.apache.logging.log4j.LogManager
import org.memobase.sftp.SftpClient
/**
* Fetches preview images for videos on Vimeo or Youtube
*/
class PreviewImageHandler(private val sftpClient: SftpClient) {
private val log = LogManager.getLogger("MediaLinker")
companion object {
/**
* Checks if URL points to Vimeo
*
* @param URL to be scrutinised
*
* @return true if URL points to Vimeo
*/
fun isVimeoUrl(url: String): Boolean {
return URL(url).host.toLowerCase() == "vimeo.com"
}
/**
* Checks if URL point to Youtube
*
* @param url URL to be scrutinised
*
* @return true if URL points to Youtube
*/
fun isYoutubeUrl(url: String): Boolean {
return listOf("youtube.com", "youtu.be").contains(URL(url).host.toLowerCase())
}
}
private fun get(urlAsString: String): String? {
val url = URL(urlAsString)
return try {
val tempFile = Files.createTempFile("", ".jpg")
val outputStream = FileOutputStream(tempFile.toFile())
with(url.openConnection() as HttpURLConnection) {
requestMethod = "GET"
outputStream.use { fileOut ->
inputStream.copyTo(fileOut)
}
}
tempFile.toString()
} catch (ex: IOException) {
log.error("Downloading of thumbnail file failed: ${ex.message}")
null
} catch (ex: FileNotFoundException) {
log.error("Can't find temporary file: ${ex.message}")
null
}
}
/**
* Get preview image from Youtube
*
* @param videoURL URL of video
*
* @return Path to local file
*/
fun getFromYoutube(videoURL: String): String? {
val id = URL(videoURL).query.split("&").firstOrNull {
it.startsWith("v=")
}
return if (id != null) {
get("https://img.youtube.com/vi/${id.substring(2)}/hqdefault.jpg")
} else {
null
}
}
/**
* Get preview image from Vimeo
*
* @param videoURL URL of video
* @param width Width of preview image
*
* @return Path to local file
*/
fun getFromVimeo(videoURL: String, width: Int): String? {
val id = URL(videoURL).path.split("/").last()
return get("https://i.vimeocdn.com/vimeo/${id}_$width.jpg")
}
/**
* Put file on sFTP server and delete local copy
*
* @param sourcePath Path to local temp file
* @param destPath Path to file about to be created on sFTP server
*/
fun moveFileToSFTP(sourcePath: String, destPath: String): String? {
return try {
sftpClient.put(sourcePath, destPath)
Files.delete(Paths.get(sourcePath))
destPath
} catch (ex: IOException) {
log.warn("Moving thumbnail file to sFTP server failed: ${ex.message}")
null
}
}
}
......@@ -53,5 +53,4 @@ data class Report(
result = 31 * result + step.hashCode()
return result
}
}
......@@ -17,10 +17,10 @@
*/
package org.memobase
import kotlin.system.exitProcess
import org.apache.kafka.streams.KafkaStreams
import org.apache.logging.log4j.LogManager
import org.memobase.settings.SettingsLoader
import kotlin.system.exitProcess
class Service(file: String = "app.yml") {
private val log = LogManager.getLogger("MediaLinkerService")
......
......@@ -17,4 +17,4 @@ object ReportMessages {
fun noOriginalIdentifier(key: String): String {
return "The record $key does not contain a identifier with rico:type 'original'!"
}
}
\ No newline at end of file
}
......@@ -3,4 +3,4 @@ package org.memobase.reports
object ReportStatus {
const val success = "SUCCESS"
const val failure = "FAILURE"
}
\ No newline at end of file
}
......@@ -18,6 +18,12 @@
package org.memobase
import com.beust.klaxon.Klaxon
import java.io.File
import java.io.FileInputStream
import java.io.FileOutputStream
import java.nio.charset.Charset
import java.nio.file.Paths
import java.util.stream.Stream
import org.apache.jena.rdf.model.ModelFactory
import org.apache.jena.riot.Lang
import org.apache.jena.riot.RDFDataMgr
......@@ -28,7 +34,6 @@ import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.kafka.common.serialization.StringSerializer
import org.apache.kafka.streams.TopologyTestDriver
import org.apache.kafka.streams.test.ConsumerRecordFactory
import org.apache.logging.log4j.LogManager
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.TestInstance
import org.junit.jupiter.api.assertAll
......@@ -36,12 +41,6 @@ import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.MethodSource
import org.memobase.rdf.NS
import org.memobase.testing.EmbeddedSftpServer
import java.io.File
import java.io.FileInputStream
import java.io.FileOutputStream
import java.nio.charset.Charset
import java.nio.file.Paths
import java.util.stream.Stream
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
class TestKafkaTopology {
......@@ -143,7 +142,6 @@ class TestKafkaTopology {
val data = reportedRecord.value()
val report = Klaxon().parse<Report>(data)
assertAll(
"",
{
......@@ -176,5 +174,4 @@ class TestKafkaTopology {
""
)
)
}
\ No newline at end of file
}
@prefix schema: <http://schema.org/> .
@prefix internal: <http://memobase.ch/internal/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix mbrs: <https://memobase.ch/recordSet/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix wdt: <http://www.wikidata.org/prop/direct/> .
@prefix mbcb: <https://memobase.ch/institution/> .
@prefix mbpo: <https://memobase.ch/physical/> .
@prefix mbcb: <https://memobase.ch/institution/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix wd: <http://www.wikidata.org/entity/> .
@prefix wdtn: <http://www.wikidata.org/prop/direct-normalized/> .
@prefix rdau: <http://rdaregistry.info/Elements/u/> .
@prefix mbdo: <https://memobase.ch/digital/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdau: <http://rdaregistry.info/Elements/u/> .
@prefix fedora: <http://fedora.info/definitions/v4/repository#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rico: <https://www.ica.org/standards/RiC/ontology#> .
@prefix ebucore: <http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix ldp: <http://www.w3.org/ns/ldp#> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix mbr: <https://memobase.ch/record/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix dc: <http://purl.org/dc/elements/1.1/> .
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment