Commit c7e2dd06 authored by Sebastian Schüpbach's avatar Sebastian Schüpbach
Browse files

Merge branch 'oembed' into 'master'

embed oembed content from youtube / vimeo

See merge request !2
parents 01e79803 bec0354f
Pipeline #17498 passed with stages
in 3 minutes and 53 seconds
.gradle/
.idea/
.gitignore
.gitlab-ci.yml
helm-charts
build
out
\ No newline at end of file
......@@ -5,79 +5,10 @@ stages:
- test
- publish
test:
stage: test
image: gradle:6.3-jdk8
tags:
- mbr
script:
- gradle --no-daemon --no-scan --no-build-cache test --fail-fast
.build-image:
stage: publish
image: docker:stable
services:
- docker:dind
script:
- docker login -u "$REGISTRY_USER" -p "$REGISTRY_PASSWORD" "$REGISTRY"
- docker build --pull -t "$IMAGE_TAG" -f "$DOCKERFILE" .
- docker push "$IMAGE_TAG"
- docker logout
build-tagged-image:
extends: .build-image
variables:
IMAGE_TAG: "$CI_REGISTRY_IMAGE:$CI_COMMIT_TAG"
REGISTRY_PASSWORD: "$CI_REGISTRY_PASSWORD"
REGISTRY_USER: "$CI_REGISTRY_USER"
REGISTRY: "$CI_REGISTRY"
DOCKERFILE: "Dockerfile"
only:
- tags
build-latest-image:
extends: .build-image
variables:
IMAGE_TAG: "$CI_REGISTRY_IMAGE:latest"
REGISTRY_PASSWORD: "$CI_REGISTRY_PASSWORD"
REGISTRY_USER: "$CI_REGISTRY_USER"
REGISTRY: "$CI_REGISTRY"
DOCKERFILE: "Dockerfile"
only:
- master
build-feature-branch-image:
extends: .build-image
variables:
IMAGE_TAG: "$CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME"
REGISTRY_PASSWORD: "$CI_REGISTRY_PASSWORD"
REGISTRY_USER: "$CI_REGISTRY_USER"
REGISTRY: "$CI_REGISTRY"
DOCKERFILE: "Dockerfile"
except:
- master
- tags
test-chart:
stage: test
image: dtzar/helm-kubectl:3.2.0
tags:
- mbr
script:
- helm lint helm-charts/
publish-chart:
stage: publish
image: dtzar/helm-kubectl:3.2.0
tags:
- mbr
script:
- export HELM_EXPERIMENTAL_OCI=1
- helm registry login -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" "$CI_REGISTRY"
- ./insert_chart_versions.sh
- helm chart save helm-charts/ "$CI_REGISTRY/$CI_PROJECT_PATH:$CI_COMMIT_TAG-chart"
- helm chart push "$CI_REGISTRY/$CI_PROJECT_PATH:$CI_COMMIT_TAG-chart"
- helm registry logout "$CI_REGISTRY"
only:
- tags
\ No newline at end of file
include:
- project: 'memoriav/memobase-2020/utilities/ci-templates'
file: 'gradle-test/gradle-test.yml'
- project: 'memoriav/memobase-2020/utilities/ci-templates'
file: 'docker-image/docker-image.yml'
- project: 'memoriav/memobase-2020/utilities/ci-templates'
file: 'helm-chart/helm-chart.yml'
......@@ -2,12 +2,11 @@ plugins {
id 'application'
id 'distribution'
id 'org.jetbrains.kotlin.jvm' version '1.3.71'
id 'com.palantir.git-version' version '0.11.0'
id 'com.gitlab.morality.grit' version '2.0.2'
id 'org.jlleitschuh.gradle.ktlint' version '9.2.1'
}
group 'org.memobase'
version = gitVersion()
mainClassName = 'org.memobase.App'
jar {
......
......@@ -6,8 +6,6 @@ object Constant {
const val sftpBasePathPropertyName = "sftp.basePath"
const val extensionsPropertyName = "extensions"
const val vimeoThumbnailWidth = 1000
const val rdfParserLang = "NTRIPLES"
const val digitalObject = "digitalObject"
......
......@@ -33,6 +33,7 @@ import org.apache.logging.log4j.LogManager
import org.memobase.rdf.EBUCORE
import org.memobase.rdf.RDF
import org.memobase.rdf.RICO
import org.memobase.reports.OembedResponse
import org.memobase.reports.ReportMessages
import org.memobase.reports.ReportStatus
import org.memobase.settings.SettingsLoader
......@@ -45,7 +46,7 @@ class KafkaTopology(private val settings: SettingsLoader) {
private val log = LogManager.getLogger("KafkaTopology")
private val sftpClient = SftpClient(settings.sftpSettings)
private val previewImageHandler = PreviewImageHandler(sftpClient)
private val previewImageHandler = RemoteResourceHandler(sftpClient)
private val sftpBasePath = appSettings.getProperty(Constant.sftpBasePathPropertyName)
private val fileExtensions = appSettings.getProperty(Constant.extensionsPropertyName).split(",")
private val reportingTopic = settings.processReportTopic
......@@ -167,16 +168,16 @@ class KafkaTopology(private val settings: SettingsLoader) {
log.warn("No valid locator url found for ${value.third.id}")
return updateRecord(value, ReportStatus.failure, thumbnailMessage = "no valid locator url")
}
PreviewImageHandler.isVimeoUrl(locator) -> {
RemoteResourceHandler.isVimeoUrl(locator) -> {
log.info("Trying to download thumbnail file on vimeo for ${value.third.id}")
this.previewImageHandler.getFromVimeo(locator, Constant.vimeoThumbnailWidth)
this.previewImageHandler.getFromVimeo(locator)
?: return updateRecord(
value,
ReportStatus.failure,
thumbnailMessage = "couldn't fetch vimeo thumbnail"
)
}
PreviewImageHandler.isYoutubeUrl(locator) -> {
RemoteResourceHandler.isYoutubeUrl(locator) -> {
log.info("Trying to download thumbnail file on youtube for ${value.third.id}")
this.previewImageHandler.getFromYoutube(locator)
?: return updateRecord(
......@@ -194,39 +195,69 @@ class KafkaTopology(private val settings: SettingsLoader) {
)
}
}.let {
val destPath = "$sftpBasePath/${value.first.second.recordSetId}/${Constant.thumbnailFolderName}/${
recordResource.uri.split(
"/"
).last()
}.jpg"
val pathOnSftpServer = previewImageHandler.moveFileToSFTP(it, destPath)
if (pathOnSftpServer != null) {
log.info("Move downloaded thumbnail file to $destPath for ${value.third.id}")
createThumbnailResource(
value.first.first,
recordResource,
digitalObjectResource,
pathOnSftpServer
)
return updateRecord(
value,
value.third.status,
thumbnailMessage = "youtube / vimeo thumbnail fetched"
)
} else {
log.warn("Couldn't move downloaded thumbnail file to $destPath for ${value.third.id}")
return updateRecord(
value,
ReportStatus.failure,
thumbnailMessage = "upload of youtube / vimeo thumbnail to sFTP server failed"
)
}
val enrichedValue = addDimensionsToDigitalObject(value, it.first)
return it.second?.let { locator ->
addLocalThumbnail(enrichedValue, recordResource, digitalObjectResource, locator)
} ?: enrichedValue
}
}
}
return value
}
private fun addLocalThumbnail(
value: Triple<Pair<Model, HeaderMetadata>, List<Resource>, Report>,
recordResource: Resource,
digitalObjectResource: Resource,
pathToLocalFile: String
): Triple<Pair<Model, HeaderMetadata>, List<Resource>, Report> {
val destPath = "$sftpBasePath/${value.first.second.recordSetId}/${Constant.thumbnailFolderName}/${
recordResource.uri.split(
"/"
).last()
}.jpg"
val pathOnSftpServer = previewImageHandler.moveFileToSFTP(pathToLocalFile, destPath)
if (pathOnSftpServer != null) {
log.info("Move downloaded thumbnail file to $destPath for ${value.third.id}")
createThumbnailResource(
value.first.first,
recordResource,
digitalObjectResource,
pathOnSftpServer
)
return updateRecord(
value,
value.third.status,
thumbnailMessage = "youtube / vimeo thumbnail fetched"
)
} else {
log.warn("Couldn't move downloaded thumbnail file to $destPath for ${value.third.id}")
return updateRecord(
value,
ReportStatus.failure,
thumbnailMessage = "upload of youtube / vimeo thumbnail to sFTP server failed"
)
}
}
private fun addDimensionsToDigitalObject(
value: Triple<Pair<Model, HeaderMetadata>, List<Resource>, Report>,
oembedObject: OembedResponse
): Triple<Pair<Model, HeaderMetadata>, List<Resource>, Report> {
val digitalObjectResource = getDigitalObjectResource(value.second)!!
// TODO
if (oembedObject.width != null) {
val width = ResourceFactory.createPlainLiteral(oembedObject.width.toString())
digitalObjectResource.addLiteral(EBUCORE.width, width)
}
if (oembedObject.height != null) {
val height = ResourceFactory.createPlainLiteral(oembedObject.height.toString())
digitalObjectResource.addLiteral(EBUCORE.height, height)
}
value.first.first.createLiteral(digitalObjectResource.toString(), true)
return value
}
private fun noThumbnailAttached(resources: List<Resource>): Boolean {
return resources.none { it.hasProperty(RICO.type, Constant.thumbnailRicoType) }
}
......
package org.memobase
import java.io.ByteArrayOutputStream
import java.io.FileNotFoundException
import java.io.FileOutputStream
import java.io.IOException
import java.net.HttpURLConnection
import java.net.URL
import java.net.URLEncoder
import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.nio.file.Paths
import org.apache.logging.log4j.LogManager
import org.memobase.exceptions.SftpClientException
import org.memobase.reports.OembedResponse
import org.memobase.sftp.SftpClient
/**
* Fetches preview images for videos on Vimeo or Youtube
* Queries oembed API for videos on Vimeo or Youtube and downloads poster images
*/
class PreviewImageHandler(private val sftpClient: SftpClient) {
class RemoteResourceHandler(private val sftpClient: SftpClient) {
private val log = LogManager.getLogger("MediaLinker")
companion object {
......@@ -39,9 +43,48 @@ class PreviewImageHandler(private val sftpClient: SftpClient) {
fun isYoutubeUrl(url: String): Boolean {
return listOf("youtube.com", "youtu.be").contains(URL(url).host.toLowerCase())
}
private fun getOembedObject(urlAsString: String): OembedResponse? {
val url = URL(urlAsString)
val outputStream = ByteArrayOutputStream()
try {
with(url.openConnection() as HttpURLConnection) {
requestMethod = "GET"
outputStream.use { fileOut ->
inputStream.copyTo(outputStream)
}
}
} catch (e: FileNotFoundException) {
return null
}
val result = String(outputStream.toByteArray())
return OembedResponse.fromJson(result)
}
fun getYoutubeOembedObject(url: String): OembedResponse? {
return getOembedObject(
"http://www.youtube.com/oembed?url=${
URLEncoder.encode(
url,
StandardCharsets.UTF_8.toString()
)
}&format=json"
)
}
fun getVimeoOembedObject(url: String): OembedResponse? {
return getOembedObject(
"https://vimeo.com/api/oembed.json?url=${
URLEncoder.encode(
url,
StandardCharsets.UTF_8.toString()
)
}"
)
}
}
private fun get(urlAsString: String): String? {
private fun getThumbnail(urlAsString: String): String? {
val url = URL(urlAsString)
return try {
val tempFile = Files.createTempFile("", ".jpg")
......@@ -64,39 +107,34 @@ class PreviewImageHandler(private val sftpClient: SftpClient) {
}
/**
* Get preview image from Youtube
* Get embedding information and preview image from Youtube
*
* @param videoURL URL of video
*
* @return Path to local file
* @return Pair with Oembed object and path to local file
*/
fun getFromYoutube(videoURL: String): String? {
val url = URL(videoURL)
val id = if (url.host.endsWith("youtube.com")) {
URL(videoURL).query.split("&").firstOrNull {
it.startsWith("v=")
}?.substring(2)
} else {
url.path.substring(1)
}
return if (id != null) {
get("https://img.youtube.com/vi/$id/hqdefault.jpg")
} else {
null
fun getFromYoutube(videoURL: String): Pair<OembedResponse, String?>? {
return getYoutubeOembedObject(videoURL)?.let { obj ->
Pair(obj, obj.thumbnail_url?.let {
getThumbnail(it)
})
}
}
/**
* Get preview image from Vimeo
* Get embedding information and preview image from Vimeo
*
* @param videoURL URL of video
* @param width Width of preview image
*
* @return Path to local file
* @return Pair with Oembed object and path to local file
*/
fun getFromVimeo(videoURL: String, width: Int): String? {
val id = URL(videoURL).path.split("/").last()
return get("https://i.vimeocdn.com/vimeo/${id}_$width.jpg")
fun getFromVimeo(videoURL: String): Pair<OembedResponse, String?>? {
return getVimeoOembedObject(videoURL)?.let { obj ->
Pair(obj, obj.thumbnail_url?.let {
getThumbnail(it)
})
}
}
/**
......
package org.memobase.reports
import com.beust.klaxon.Klaxon
data class OembedResponse(
val type: String,
val version: String,
val title: String?,
val author_name: String?,
val author_url: String?,
val provider_name: String?,
val provider_url: String?,
val thumbnail_url: String?,
val thumbnail_width: Int?,
val thumbnail_height: Int?,
val url: String? = null,
val width: Int?,
val height: Int?,
val html: String?
) {
companion object {
fun fromJson(msg: String): OembedResponse? {
return Klaxon().parse<OembedResponse>(msg)
}
}
}
import kotlin.test.assertNotNull
import kotlin.test.assertNull
import org.junit.jupiter.api.Test
import org.memobase.RemoteResourceHandler
internal class TestRemoteResourceHandler {
@Test
fun getYoutubeOembedObject() {
val oembed = RemoteResourceHandler.getYoutubeOembedObject("https://www.youtube.com/watch?v=5ujk7IamcPI")
assertNotNull(oembed)
}
@Test
fun getVimeoOembedObject() {
val oembed = RemoteResourceHandler.getVimeoOembedObject("https://vimeo.com/223023510")
assertNotNull(oembed)
}
@Test
fun getOembedObjectWithInvalidUrl() {
val oembed = RemoteResourceHandler.getVimeoOembedObject("https://vieo.com/223023510")
assertNull(oembed)
}
}
......@@ -96,7 +96,7 @@ _:B78e9a20fX2De01eX2D4df2X2D9db0X2De2f8846167cb <https://www.ica.org/standards/R
_:B78e9a20fX2De01eX2D4df2X2D9db0X2De2f8846167cb <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.ica.org/standards/RiC/ontology#Identifier> .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <https://www.ica.org/standards/RiC/ontology#isDerivedFromInstantiation> <https://memobase.ch/physical/Tanzarchiv-42858-43349-1> .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <https://www.ica.org/standards/RiC/ontology#instantiates> <https://memobase.ch/record/Tanzarchiv-42858-43349> .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#locator> "https://vimeo.com/199629565" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#locator> "https://vimeo.com/223023510" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <https://www.ica.org/standards/RiC/ontology#identifiedBy> _:B78e9a20fX2De01eX2D4df2X2D9db0X2De2f8846167cb .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <https://www.ica.org/standards/RiC/ontology#type> "digitalObject" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.ica.org/standards/RiC/ontology#Instantiation> .
\ No newline at end of file
......@@ -3,7 +3,9 @@
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1/derived> <https://www.ica.org/standards/RiC/ontology#instantiates> <https://memobase.ch/record/Tanzarchiv-42858-43349> .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1/derived> <https://www.ica.org/standards/RiC/ontology#isDerivedFromInstantiation> <https://memobase.ch/digital/Tanzarchiv-42858-43349-1> .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1/derived> <https://www.ica.org/standards/RiC/ontology#type> "thumbnail" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#locator> "https://vimeo.com/199629565" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#height> "360" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#locator> "https://vimeo.com/223023510" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#width> "640" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.ica.org/standards/RiC/ontology#Instantiation> .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <https://www.ica.org/standards/RiC/ontology#hasDerivedInstantiation> <https://memobase.ch/digital/Tanzarchiv-42858-43349-1/derived> .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <https://www.ica.org/standards/RiC/ontology#identifiedBy> _:B .
......
......@@ -44,6 +44,10 @@ mbr:Tanzarchiv-42858-43349
rico:type "main"
] ;
rico:heldBy mbcb:Tanzarchiv ;
rico:identifiedBy [ a rico:Identifier ;
rico:identifier "43349" ;
rico:type "original"
] ;
rico:identifiedBy [ a rico:Identifier ;
rico:identifier "Tanzarchiv-42858-43349" ;
rico:type "main"
......@@ -52,10 +56,6 @@ mbr:Tanzarchiv-42858-43349
rico:identifier "Tanzarchiv-43349" ;
rico:type "oldMemobase"
] ;
rico:identifiedBy [ a rico:Identifier ;
rico:identifier "43349" ;
rico:type "original"
] ;
rico:isPartOf mbrs:Tanzarchiv-42858 ;
rico:recordResourceOrInstantiationIsSourceOfCreationRelation
_:b0 , _:b1 , _:b2 ;
......@@ -70,7 +70,9 @@ mbr:Tanzarchiv-42858-43349
mbdo:Tanzarchiv-42858-43349-1
a rico:Instantiation ;
ebucore:locator "https://vimeo.com/199629565" ;
ebucore:height "360" ;
ebucore:locator "https://vimeo.com/223023510" ;
ebucore:width "640" ;
rico:hasDerivedInstantiation <https://memobase.ch/digital/Tanzarchiv-42858-43349-1/derived> ;
rico:identifiedBy [ a rico:Identifier ;
rico:identifier "Tanzarchiv-42858-43349-1" ;
......@@ -81,15 +83,15 @@ mbdo:Tanzarchiv-42858-43349-1
mbpo:Tanzarchiv-42858-43349-1 ;
rico:type "digitalObject" .
_:b0 a rico:CreationRelation ;
_:b2 a rico:CreationRelation ;
rico:creationRelationHasSource mbr:Tanzarchiv-42858-43349 ;
rico:creationRelationHasTarget [ a rico:Person ;
rico:agentIsTargetOfCreationRelation
_:b0 ;
rico:name "Christophe Calpini"
_:b2 ;
rico:name "Katarzyna Gdaniec (Choreograf / Chorégraphe)"
] ;
rico:name "Komponisten / Compositeur" ;
rico:type "contributor" .
rico:name "Author" ;
rico:type "creator" .
mbpo:Tanzarchiv-42858-43349-1
a rico:Instantiation ;
......@@ -123,11 +125,11 @@ mbpo:Tanzarchiv-42858-43349-1
mbdo:Tanzarchiv-42858-43349-1 ;
rico:type "thumbnail" .
_:b2 a rico:CreationRelation ;
_:b0 a rico:CreationRelation ;
rico:creationRelationHasSource mbr:Tanzarchiv-42858-43349 ;
rico:creationRelationHasTarget [ a rico:CorporateBody ;
rico:agentIsTargetOfCreationRelation
_:b2 ;
_:b0 ;
rico:name "Compagnie Linga"
] ;
rico:name "Kompanie / Compagnie" ;
......@@ -138,7 +140,7 @@ _:b1 a rico:CreationRelation ;
rico:creationRelationHasTarget [ a rico:Person ;
rico:agentIsTargetOfCreationRelation
_:b1 ;
rico:name "Katarzyna Gdaniec (Choreograf / Chorégraphe)"
rico:name "Christophe Calpini"
] ;
rico:name "Author" ;
rico:type "creator" .
rico:name "Komponisten / Compositeur" ;
rico:type "contributor" .
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment