Unverified Commit 7b86d221 authored by Sebastian Schüpbach's avatar Sebastian Schüpbach
Browse files

fetch oembed content from youtube / vimeo

parent ba1ccb4f
Pipeline #17367 failed with stages
in 2 minutes and 24 seconds
......@@ -6,8 +6,6 @@ object Constant {
const val sftpBasePathPropertyName = "sftp.basePath"
const val extensionsPropertyName = "extensions"
const val vimeoThumbnailWidth = 1000
const val rdfParserLang = "NTRIPLES"
const val digitalObject = "digitalObject"
......
......@@ -33,6 +33,7 @@ import org.apache.logging.log4j.LogManager
import org.memobase.rdf.EBUCORE
import org.memobase.rdf.RDF
import org.memobase.rdf.RICO
import org.memobase.reports.OembedResponse
import org.memobase.reports.ReportMessages
import org.memobase.reports.ReportStatus
import org.memobase.settings.SettingsLoader
......@@ -45,7 +46,7 @@ class KafkaTopology(private val settings: SettingsLoader) {
private val log = LogManager.getLogger("KafkaTopology")
private val sftpClient = SftpClient(settings.sftpSettings)
private val previewImageHandler = PreviewImageHandler(sftpClient)
private val previewImageHandler = RemoteResourceHandler(sftpClient)
private val sftpBasePath = appSettings.getProperty(Constant.sftpBasePathPropertyName)
private val fileExtensions = appSettings.getProperty(Constant.extensionsPropertyName).split(",")
private val reportingTopic = settings.processReportTopic
......@@ -167,16 +168,16 @@ class KafkaTopology(private val settings: SettingsLoader) {
log.warn("No valid locator url found for ${value.third.id}")
return updateRecord(value, ReportStatus.failure, thumbnailMessage = "no valid locator url")
}
PreviewImageHandler.isVimeoUrl(locator) -> {
RemoteResourceHandler.isVimeoUrl(locator) -> {
log.info("Trying to download thumbnail file on vimeo for ${value.third.id}")
this.previewImageHandler.getFromVimeo(locator, Constant.vimeoThumbnailWidth)
this.previewImageHandler.getFromVimeo(locator)
?: return updateRecord(
value,
ReportStatus.failure,
thumbnailMessage = "couldn't fetch vimeo thumbnail"
)
}
PreviewImageHandler.isYoutubeUrl(locator) -> {
RemoteResourceHandler.isYoutubeUrl(locator) -> {
log.info("Trying to download thumbnail file on youtube for ${value.third.id}")
this.previewImageHandler.getFromYoutube(locator)
?: return updateRecord(
......@@ -194,39 +195,69 @@ class KafkaTopology(private val settings: SettingsLoader) {
)
}
}.let {
val destPath = "$sftpBasePath/${value.first.second.recordSetId}/${Constant.thumbnailFolderName}/${
recordResource.uri.split(
"/"
).last()
}.jpg"
val pathOnSftpServer = previewImageHandler.moveFileToSFTP(it, destPath)
if (pathOnSftpServer != null) {
log.info("Move downloaded thumbnail file to $destPath for ${value.third.id}")
createThumbnailResource(
value.first.first,
recordResource,
digitalObjectResource,
pathOnSftpServer
)
return updateRecord(
value,
value.third.status,
thumbnailMessage = "youtube / vimeo thumbnail fetched"
)
} else {
log.warn("Couldn't move downloaded thumbnail file to $destPath for ${value.third.id}")
return updateRecord(
value,
ReportStatus.failure,
thumbnailMessage = "upload of youtube / vimeo thumbnail to sFTP server failed"
)
}
val enrichedValue = addDimensionsToDigitalObject(value, it.first)
return it.second?.let { locator ->
addLocalThumbnail(enrichedValue, recordResource, digitalObjectResource, locator)
} ?: enrichedValue
}
}
}
return value
}
private fun addLocalThumbnail(
value: Triple<Pair<Model, HeaderMetadata>, List<Resource>, Report>,
recordResource: Resource,
digitalObjectResource: Resource,
pathToLocalFile: String
): Triple<Pair<Model, HeaderMetadata>, List<Resource>, Report> {
val destPath = "$sftpBasePath/${value.first.second.recordSetId}/${Constant.thumbnailFolderName}/${
recordResource.uri.split(
"/"
).last()
}.jpg"
val pathOnSftpServer = previewImageHandler.moveFileToSFTP(pathToLocalFile, destPath)
if (pathOnSftpServer != null) {
log.info("Move downloaded thumbnail file to $destPath for ${value.third.id}")
createThumbnailResource(
value.first.first,
recordResource,
digitalObjectResource,
pathOnSftpServer
)
return updateRecord(
value,
value.third.status,
thumbnailMessage = "youtube / vimeo thumbnail fetched"
)
} else {
log.warn("Couldn't move downloaded thumbnail file to $destPath for ${value.third.id}")
return updateRecord(
value,
ReportStatus.failure,
thumbnailMessage = "upload of youtube / vimeo thumbnail to sFTP server failed"
)
}
}
private fun addDimensionsToDigitalObject(
value: Triple<Pair<Model, HeaderMetadata>, List<Resource>, Report>,
oembedObject: OembedResponse
): Triple<Pair<Model, HeaderMetadata>, List<Resource>, Report> {
val digitalObjectResource = getDigitalObjectResource(value.second)!!
// TODO
if (oembedObject.width != null) {
val width = ResourceFactory.createPlainLiteral(oembedObject.width.toString())
digitalObjectResource.addLiteral(EBUCORE.width, width)
}
if (oembedObject.height != null) {
val height = ResourceFactory.createPlainLiteral(oembedObject.height.toString())
digitalObjectResource.addLiteral(EBUCORE.height, height)
}
value.first.first.createLiteral(digitalObjectResource.toString(), true)
return value
}
private fun noThumbnailAttached(resources: List<Resource>): Boolean {
return resources.none { it.hasProperty(RICO.type, Constant.thumbnailRicoType) }
}
......
package org.memobase
import java.io.ByteArrayOutputStream
import java.io.FileNotFoundException
import java.io.FileOutputStream
import java.io.IOException
import java.net.HttpURLConnection
import java.net.URL
import java.net.URLEncoder
import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.nio.file.Paths
import org.apache.logging.log4j.LogManager
import org.memobase.exceptions.SftpClientException
import org.memobase.reports.OembedResponse
import org.memobase.sftp.SftpClient
/**
* Fetches preview images for videos on Vimeo or Youtube
* Queries oembed API for videos on Vimeo or Youtube and downloads poster images
*/
class PreviewImageHandler(private val sftpClient: SftpClient) {
class RemoteResourceHandler(private val sftpClient: SftpClient) {
private val log = LogManager.getLogger("MediaLinker")
companion object {
......@@ -39,9 +43,48 @@ class PreviewImageHandler(private val sftpClient: SftpClient) {
fun isYoutubeUrl(url: String): Boolean {
return listOf("youtube.com", "youtu.be").contains(URL(url).host.toLowerCase())
}
private fun getOembedObject(urlAsString: String): OembedResponse? {
val url = URL(urlAsString)
val outputStream = ByteArrayOutputStream()
try {
with(url.openConnection() as HttpURLConnection) {
requestMethod = "GET"
outputStream.use { fileOut ->
inputStream.copyTo(outputStream)
}
}
} catch (e: FileNotFoundException) {
return null
}
val result = String(outputStream.toByteArray())
return OembedResponse.fromJson(result)
}
fun getYoutubeOembedObject(url: String): OembedResponse? {
return getOembedObject(
"http://www.youtube.com/oembed?url=${
URLEncoder.encode(
url,
StandardCharsets.UTF_8.toString()
)
}&format=json"
)
}
fun getVimeoOembedObject(url: String): OembedResponse? {
return getOembedObject(
"https://vimeo.com/api/oembed.json?url=${
URLEncoder.encode(
url,
StandardCharsets.UTF_8.toString()
)
}"
)
}
}
private fun get(urlAsString: String): String? {
private fun getThumbnail(urlAsString: String): String? {
val url = URL(urlAsString)
return try {
val tempFile = Files.createTempFile("", ".jpg")
......@@ -64,39 +107,34 @@ class PreviewImageHandler(private val sftpClient: SftpClient) {
}
/**
* Get preview image from Youtube
* Get embedding information and preview image from Youtube
*
* @param videoURL URL of video
*
* @return Path to local file
* @return Pair with Oembed object and path to local file
*/
fun getFromYoutube(videoURL: String): String? {
val url = URL(videoURL)
val id = if (url.host.endsWith("youtube.com")) {
URL(videoURL).query.split("&").firstOrNull {
it.startsWith("v=")
}?.substring(2)
} else {
url.path.substring(1)
}
return if (id != null) {
get("https://img.youtube.com/vi/$id/hqdefault.jpg")
} else {
null
fun getFromYoutube(videoURL: String): Pair<OembedResponse, String?>? {
return getYoutubeOembedObject(videoURL)?.let { obj ->
Pair(obj, obj.thumbnail_url?.let {
getThumbnail(it)
})
}
}
/**
* Get preview image from Vimeo
* Get embedding information and preview image from Vimeo
*
* @param videoURL URL of video
* @param width Width of preview image
*
* @return Path to local file
* @return Pair with Oembed object and path to local file
*/
fun getFromVimeo(videoURL: String, width: Int): String? {
val id = URL(videoURL).path.split("/").last()
return get("https://i.vimeocdn.com/vimeo/${id}_$width.jpg")
fun getFromVimeo(videoURL: String): Pair<OembedResponse, String?>? {
return getVimeoOembedObject(videoURL)?.let { obj ->
Pair(obj, obj.thumbnail_url?.let {
getThumbnail(it)
})
}
}
/**
......
package org.memobase.reports
import com.beust.klaxon.Klaxon
data class OembedResponse(
val type: String,
val version: String,
val title: String?,
val author_name: String?,
val author_url: String?,
val provider_name: String?,
val provider_url: String?,
val thumbnail_url: String?,
val thumbnail_width: Int?,
val thumbnail_height: Int?,
val url: String? = null,
val width: Int?,
val height: Int?,
val html: String?
) {
companion object {
fun fromJson(msg: String): OembedResponse? {
return Klaxon().parse<OembedResponse>(msg)
}
}
}
import kotlin.test.assertNotNull
import kotlin.test.assertNull
import org.junit.jupiter.api.Test
import org.memobase.RemoteResourceHandler
internal class TestRemoteResourceHandler {
@Test
fun getYoutubeOembedObject() {
val oembed = RemoteResourceHandler.getYoutubeOembedObject("https://www.youtube.com/watch?v=5ujk7IamcPI")
assertNotNull(oembed)
}
@Test
fun getVimeoOembedObject() {
val oembed = RemoteResourceHandler.getVimeoOembedObject("https://vimeo.com/223023510")
assertNotNull(oembed)
}
@Test
fun getOembedObjectWithInvalidUrl() {
val oembed = RemoteResourceHandler.getVimeoOembedObject("https://vieo.com/223023510")
assertNull(oembed)
}
}
......@@ -96,7 +96,7 @@ _:B78e9a20fX2De01eX2D4df2X2D9db0X2De2f8846167cb <https://www.ica.org/standards/R
_:B78e9a20fX2De01eX2D4df2X2D9db0X2De2f8846167cb <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.ica.org/standards/RiC/ontology#Identifier> .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <https://www.ica.org/standards/RiC/ontology#isDerivedFromInstantiation> <https://memobase.ch/physical/Tanzarchiv-42858-43349-1> .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <https://www.ica.org/standards/RiC/ontology#instantiates> <https://memobase.ch/record/Tanzarchiv-42858-43349> .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#locator> "https://vimeo.com/199629565" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#locator> "https://vimeo.com/223023510" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <https://www.ica.org/standards/RiC/ontology#identifiedBy> _:B78e9a20fX2De01eX2D4df2X2D9db0X2De2f8846167cb .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <https://www.ica.org/standards/RiC/ontology#type> "digitalObject" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.ica.org/standards/RiC/ontology#Instantiation> .
\ No newline at end of file
......@@ -3,7 +3,9 @@
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1/derived> <https://www.ica.org/standards/RiC/ontology#instantiates> <https://memobase.ch/record/Tanzarchiv-42858-43349> .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1/derived> <https://www.ica.org/standards/RiC/ontology#isDerivedFromInstantiation> <https://memobase.ch/digital/Tanzarchiv-42858-43349-1> .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1/derived> <https://www.ica.org/standards/RiC/ontology#type> "thumbnail" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#locator> "https://vimeo.com/199629565" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#height> "360" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#locator> "https://vimeo.com/223023510" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#width> "640" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.ica.org/standards/RiC/ontology#Instantiation> .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <https://www.ica.org/standards/RiC/ontology#hasDerivedInstantiation> <https://memobase.ch/digital/Tanzarchiv-42858-43349-1/derived> .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <https://www.ica.org/standards/RiC/ontology#identifiedBy> _:B .
......
......@@ -44,6 +44,10 @@ mbr:Tanzarchiv-42858-43349
rico:type "main"
] ;
rico:heldBy mbcb:Tanzarchiv ;
rico:identifiedBy [ a rico:Identifier ;
rico:identifier "43349" ;
rico:type "original"
] ;
rico:identifiedBy [ a rico:Identifier ;
rico:identifier "Tanzarchiv-42858-43349" ;
rico:type "main"
......@@ -52,10 +56,6 @@ mbr:Tanzarchiv-42858-43349
rico:identifier "Tanzarchiv-43349" ;
rico:type "oldMemobase"
] ;
rico:identifiedBy [ a rico:Identifier ;
rico:identifier "43349" ;
rico:type "original"
] ;
rico:isPartOf mbrs:Tanzarchiv-42858 ;
rico:recordResourceOrInstantiationIsSourceOfCreationRelation
_:b0 , _:b1 , _:b2 ;
......@@ -70,7 +70,9 @@ mbr:Tanzarchiv-42858-43349
mbdo:Tanzarchiv-42858-43349-1
a rico:Instantiation ;
ebucore:locator "https://vimeo.com/199629565" ;
ebucore:height "360" ;
ebucore:locator "https://vimeo.com/223023510" ;
ebucore:width "640" ;
rico:hasDerivedInstantiation <https://memobase.ch/digital/Tanzarchiv-42858-43349-1/derived> ;
rico:identifiedBy [ a rico:Identifier ;
rico:identifier "Tanzarchiv-42858-43349-1" ;
......@@ -81,15 +83,15 @@ mbdo:Tanzarchiv-42858-43349-1
mbpo:Tanzarchiv-42858-43349-1 ;
rico:type "digitalObject" .
_:b0 a rico:CreationRelation ;
_:b2 a rico:CreationRelation ;
rico:creationRelationHasSource mbr:Tanzarchiv-42858-43349 ;
rico:creationRelationHasTarget [ a rico:Person ;
rico:agentIsTargetOfCreationRelation
_:b0 ;
rico:name "Christophe Calpini"
_:b2 ;
rico:name "Katarzyna Gdaniec (Choreograf / Chorégraphe)"
] ;
rico:name "Komponisten / Compositeur" ;
rico:type "contributor" .
rico:name "Author" ;
rico:type "creator" .
mbpo:Tanzarchiv-42858-43349-1
a rico:Instantiation ;
......@@ -123,11 +125,11 @@ mbpo:Tanzarchiv-42858-43349-1
mbdo:Tanzarchiv-42858-43349-1 ;
rico:type "thumbnail" .
_:b2 a rico:CreationRelation ;
_:b0 a rico:CreationRelation ;
rico:creationRelationHasSource mbr:Tanzarchiv-42858-43349 ;
rico:creationRelationHasTarget [ a rico:CorporateBody ;
rico:agentIsTargetOfCreationRelation
_:b2 ;
_:b0 ;
rico:name "Compagnie Linga"
] ;
rico:name "Kompanie / Compagnie" ;
......@@ -138,7 +140,7 @@ _:b1 a rico:CreationRelation ;
rico:creationRelationHasTarget [ a rico:Person ;
rico:agentIsTargetOfCreationRelation
_:b1 ;
rico:name "Katarzyna Gdaniec (Choreograf / Chorégraphe)"
rico:name "Christophe Calpini"
] ;
rico:name "Author" ;
rico:type "creator" .
rico:name "Komponisten / Compositeur" ;
rico:type "contributor" .
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment