Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
memoriav
Memobase 2020
services
Import Process
Media-Linker
Commits
7b86d221
Unverified
Commit
7b86d221
authored
Nov 16, 2020
by
Sebastian Schüpbach
Browse files
fetch oembed content from youtube / vimeo
parent
ba1ccb4f
Pipeline
#17367
failed with stages
in 2 minutes and 24 seconds
Changes
8
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
src/main/kotlin/Constant.kt
View file @
7b86d221
...
...
@@ -6,8 +6,6 @@ object Constant {
const
val
sftpBasePathPropertyName
=
"sftp.basePath"
const
val
extensionsPropertyName
=
"extensions"
const
val
vimeoThumbnailWidth
=
1000
const
val
rdfParserLang
=
"NTRIPLES"
const
val
digitalObject
=
"digitalObject"
...
...
src/main/kotlin/KafkaTopology.kt
View file @
7b86d221
...
...
@@ -33,6 +33,7 @@ import org.apache.logging.log4j.LogManager
import
org.memobase.rdf.EBUCORE
import
org.memobase.rdf.RDF
import
org.memobase.rdf.RICO
import
org.memobase.reports.OembedResponse
import
org.memobase.reports.ReportMessages
import
org.memobase.reports.ReportStatus
import
org.memobase.settings.SettingsLoader
...
...
@@ -45,7 +46,7 @@ class KafkaTopology(private val settings: SettingsLoader) {
private
val
log
=
LogManager
.
getLogger
(
"KafkaTopology"
)
private
val
sftpClient
=
SftpClient
(
settings
.
sftpSettings
)
private
val
previewImageHandler
=
PreviewImag
eHandler
(
sftpClient
)
private
val
previewImageHandler
=
RemoteResourc
eHandler
(
sftpClient
)
private
val
sftpBasePath
=
appSettings
.
getProperty
(
Constant
.
sftpBasePathPropertyName
)
private
val
fileExtensions
=
appSettings
.
getProperty
(
Constant
.
extensionsPropertyName
).
split
(
","
)
private
val
reportingTopic
=
settings
.
processReportTopic
...
...
@@ -167,16 +168,16 @@ class KafkaTopology(private val settings: SettingsLoader) {
log
.
warn
(
"No valid locator url found for ${value.third.id}"
)
return
updateRecord
(
value
,
ReportStatus
.
failure
,
thumbnailMessage
=
"no valid locator url"
)
}
PreviewImag
eHandler
.
isVimeoUrl
(
locator
)
->
{
RemoteResourc
eHandler
.
isVimeoUrl
(
locator
)
->
{
log
.
info
(
"Trying to download thumbnail file on vimeo for ${value.third.id}"
)
this
.
previewImageHandler
.
getFromVimeo
(
locator
,
Constant
.
vimeoThumbnailWidth
)
this
.
previewImageHandler
.
getFromVimeo
(
locator
)
?:
return
updateRecord
(
value
,
ReportStatus
.
failure
,
thumbnailMessage
=
"couldn't fetch vimeo thumbnail"
)
}
PreviewImag
eHandler
.
isYoutubeUrl
(
locator
)
->
{
RemoteResourc
eHandler
.
isYoutubeUrl
(
locator
)
->
{
log
.
info
(
"Trying to download thumbnail file on youtube for ${value.third.id}"
)
this
.
previewImageHandler
.
getFromYoutube
(
locator
)
?:
return
updateRecord
(
...
...
@@ -194,39 +195,69 @@ class KafkaTopology(private val settings: SettingsLoader) {
)
}
}.
let
{
val
destPath
=
"$sftpBasePath/${value.first.second.recordSetId}/${Constant.thumbnailFolderName}/${
recordResource
.
uri
.
split
(
"/"
).
last
()
}.
jpg
"
val
pathOnSftpServer
=
previewImageHandler
.
moveFileToSFTP
(
it
,
destPath
)
if
(
pathOnSftpServer
!=
null
)
{
log
.
info
(
"Move downloaded thumbnail file to $destPath for ${value.third.id}"
)
createThumbnailResource
(
value
.
first
.
first
,
recordResource
,
digitalObjectResource
,
pathOnSftpServer
)
return
updateRecord
(
value
,
value
.
third
.
status
,
thumbnailMessage
=
"youtube / vimeo thumbnail fetched"
)
}
else
{
log
.
warn
(
"Couldn't move downloaded thumbnail file to $destPath for ${value.third.id}"
)
return
updateRecord
(
value
,
ReportStatus
.
failure
,
thumbnailMessage
=
"upload of youtube / vimeo thumbnail to sFTP server failed"
)
}
val
enrichedValue
=
addDimensionsToDigitalObject
(
value
,
it
.
first
)
return
it
.
second
?.
let
{
locator
->
addLocalThumbnail
(
enrichedValue
,
recordResource
,
digitalObjectResource
,
locator
)
}
?:
enrichedValue
}
}
}
return
value
}
private
fun
addLocalThumbnail
(
value
:
Triple
<
Pair
<
Model
,
HeaderMetadata
>,
List
<
Resource
>,
Report
>,
recordResource
:
Resource
,
digitalObjectResource
:
Resource
,
pathToLocalFile
:
String
):
Triple
<
Pair
<
Model
,
HeaderMetadata
>,
List
<
Resource
>,
Report
>
{
val
destPath
=
"$sftpBasePath/${value.first.second.recordSetId}/${Constant.thumbnailFolderName}/${
recordResource
.
uri
.
split
(
"/"
).
last
()
}.
jpg
"
val
pathOnSftpServer
=
previewImageHandler
.
moveFileToSFTP
(
pathToLocalFile
,
destPath
)
if
(
pathOnSftpServer
!=
null
)
{
log
.
info
(
"Move downloaded thumbnail file to $destPath for ${value.third.id}"
)
createThumbnailResource
(
value
.
first
.
first
,
recordResource
,
digitalObjectResource
,
pathOnSftpServer
)
return
updateRecord
(
value
,
value
.
third
.
status
,
thumbnailMessage
=
"youtube / vimeo thumbnail fetched"
)
}
else
{
log
.
warn
(
"Couldn't move downloaded thumbnail file to $destPath for ${value.third.id}"
)
return
updateRecord
(
value
,
ReportStatus
.
failure
,
thumbnailMessage
=
"upload of youtube / vimeo thumbnail to sFTP server failed"
)
}
}
private
fun
addDimensionsToDigitalObject
(
value
:
Triple
<
Pair
<
Model
,
HeaderMetadata
>,
List
<
Resource
>,
Report
>,
oembedObject
:
OembedResponse
):
Triple
<
Pair
<
Model
,
HeaderMetadata
>,
List
<
Resource
>,
Report
>
{
val
digitalObjectResource
=
getDigitalObjectResource
(
value
.
second
)
!!
// TODO
if
(
oembedObject
.
width
!=
null
)
{
val
width
=
ResourceFactory
.
createPlainLiteral
(
oembedObject
.
width
.
toString
())
digitalObjectResource
.
addLiteral
(
EBUCORE
.
width
,
width
)
}
if
(
oembedObject
.
height
!=
null
)
{
val
height
=
ResourceFactory
.
createPlainLiteral
(
oembedObject
.
height
.
toString
())
digitalObjectResource
.
addLiteral
(
EBUCORE
.
height
,
height
)
}
value
.
first
.
first
.
createLiteral
(
digitalObjectResource
.
toString
(),
true
)
return
value
}
private
fun
noThumbnailAttached
(
resources
:
List
<
Resource
>):
Boolean
{
return
resources
.
none
{
it
.
hasProperty
(
RICO
.
type
,
Constant
.
thumbnailRicoType
)
}
}
...
...
src/main/kotlin/
PreviewImag
eHandler.kt
→
src/main/kotlin/
RemoteResourc
eHandler.kt
View file @
7b86d221
package
org.memobase
import
java.io.ByteArrayOutputStream
import
java.io.FileNotFoundException
import
java.io.FileOutputStream
import
java.io.IOException
import
java.net.HttpURLConnection
import
java.net.URL
import
java.net.URLEncoder
import
java.nio.charset.StandardCharsets
import
java.nio.file.Files
import
java.nio.file.Paths
import
org.apache.logging.log4j.LogManager
import
org.memobase.exceptions.SftpClientException
import
org.memobase.reports.OembedResponse
import
org.memobase.sftp.SftpClient
/**
*
Fetches preview images
for videos on Vimeo or Youtube
*
Queries oembed API
for videos on Vimeo or Youtube
and downloads poster images
*/
class
PreviewImag
eHandler
(
private
val
sftpClient
:
SftpClient
)
{
class
RemoteResourc
eHandler
(
private
val
sftpClient
:
SftpClient
)
{
private
val
log
=
LogManager
.
getLogger
(
"MediaLinker"
)
companion
object
{
...
...
@@ -39,9 +43,48 @@ class PreviewImageHandler(private val sftpClient: SftpClient) {
fun
isYoutubeUrl
(
url
:
String
):
Boolean
{
return
listOf
(
"youtube.com"
,
"youtu.be"
).
contains
(
URL
(
url
).
host
.
toLowerCase
())
}
private
fun
getOembedObject
(
urlAsString
:
String
):
OembedResponse
?
{
val
url
=
URL
(
urlAsString
)
val
outputStream
=
ByteArrayOutputStream
()
try
{
with
(
url
.
openConnection
()
as
HttpURLConnection
)
{
requestMethod
=
"GET"
outputStream
.
use
{
fileOut
->
inputStream
.
copyTo
(
outputStream
)
}
}
}
catch
(
e
:
FileNotFoundException
)
{
return
null
}
val
result
=
String
(
outputStream
.
toByteArray
())
return
OembedResponse
.
fromJson
(
result
)
}
fun
getYoutubeOembedObject
(
url
:
String
):
OembedResponse
?
{
return
getOembedObject
(
"http://www.youtube.com/oembed?url=${
URLEncoder
.
encode
(
url
,
StandardCharsets
.
UTF_8
.
toString
()
)
}&
format
=
json
"
)
}
fun
getVimeoOembedObject
(
url
:
String
):
OembedResponse
?
{
return
getOembedObject
(
"https://vimeo.com/api/oembed.json?url=${
URLEncoder
.
encode
(
url
,
StandardCharsets
.
UTF_8
.
toString
()
)
}
"
)
}
}
private
fun
get
(
urlAsString
:
String
):
String
?
{
private
fun
get
Thumbnail
(
urlAsString
:
String
):
String
?
{
val
url
=
URL
(
urlAsString
)
return
try
{
val
tempFile
=
Files
.
createTempFile
(
""
,
".jpg"
)
...
...
@@ -64,39 +107,34 @@ class PreviewImageHandler(private val sftpClient: SftpClient) {
}
/**
* Get preview image from Youtube
* Get
embedding information and
preview image from Youtube
*
* @param videoURL URL of video
*
* @return Path to local file
* @return Pa
ir with Oembed object and pa
th to local file
*/
fun
getFromYoutube
(
videoURL
:
String
):
String
?
{
val
url
=
URL
(
videoURL
)
val
id
=
if
(
url
.
host
.
endsWith
(
"youtube.com"
))
{
URL
(
videoURL
).
query
.
split
(
"&"
).
firstOrNull
{
it
.
startsWith
(
"v="
)
}
?.
substring
(
2
)
}
else
{
url
.
path
.
substring
(
1
)
}
return
if
(
id
!=
null
)
{
get
(
"https://img.youtube.com/vi/$id/hqdefault.jpg"
)
}
else
{
null
fun
getFromYoutube
(
videoURL
:
String
):
Pair
<
OembedResponse
,
String
?
>?
{
return
getYoutubeOembedObject
(
videoURL
)
?.
let
{
obj
->
Pair
(
obj
,
obj
.
thumbnail_url
?.
let
{
getThumbnail
(
it
)
})
}
}
/**
* Get preview image from Vimeo
* Get
embedding information and
preview image from Vimeo
*
* @param videoURL URL of video
* @param width Width of preview image
*
* @return Path to local file
* @return Pa
ir with Oembed object and pa
th to local file
*/
fun
getFromVimeo
(
videoURL
:
String
,
width
:
Int
):
String
?
{
val
id
=
URL
(
videoURL
).
path
.
split
(
"/"
).
last
()
return
get
(
"https://i.vimeocdn.com/vimeo/${id}_$width.jpg"
)
fun
getFromVimeo
(
videoURL
:
String
):
Pair
<
OembedResponse
,
String
?
>?
{
return
getVimeoOembedObject
(
videoURL
)
?.
let
{
obj
->
Pair
(
obj
,
obj
.
thumbnail_url
?.
let
{
getThumbnail
(
it
)
})
}
}
/**
...
...
src/main/kotlin/reports/OembedResponse.kt
0 → 100644
View file @
7b86d221
package
org.memobase.reports
import
com.beust.klaxon.Klaxon
data class
OembedResponse
(
val
type
:
String
,
val
version
:
String
,
val
title
:
String
?,
val
author_name
:
String
?,
val
author_url
:
String
?,
val
provider_name
:
String
?,
val
provider_url
:
String
?,
val
thumbnail_url
:
String
?,
val
thumbnail_width
:
Int
?,
val
thumbnail_height
:
Int
?,
val
url
:
String
?
=
null
,
val
width
:
Int
?,
val
height
:
Int
?,
val
html
:
String
?
)
{
companion
object
{
fun
fromJson
(
msg
:
String
):
OembedResponse
?
{
return
Klaxon
().
parse
<
OembedResponse
>(
msg
)
}
}
}
src/test/kotlin/TestRemoteResourceHandler.kt
0 → 100644
View file @
7b86d221
import
kotlin.test.assertNotNull
import
kotlin.test.assertNull
import
org.junit.jupiter.api.Test
import
org.memobase.RemoteResourceHandler
internal
class
TestRemoteResourceHandler
{
@Test
fun
getYoutubeOembedObject
()
{
val
oembed
=
RemoteResourceHandler
.
getYoutubeOembedObject
(
"https://www.youtube.com/watch?v=5ujk7IamcPI"
)
assertNotNull
(
oembed
)
}
@Test
fun
getVimeoOembedObject
()
{
val
oembed
=
RemoteResourceHandler
.
getVimeoOembedObject
(
"https://vimeo.com/223023510"
)
assertNotNull
(
oembed
)
}
@Test
fun
getOembedObjectWithInvalidUrl
()
{
val
oembed
=
RemoteResourceHandler
.
getVimeoOembedObject
(
"https://vieo.com/223023510"
)
assertNull
(
oembed
)
}
}
src/test/resources/data/input4.nt
View file @
7b86d221
...
...
@@ -96,7 +96,7 @@ _:B78e9a20fX2De01eX2D4df2X2D9db0X2De2f8846167cb <https://www.ica.org/standards/R
_:B78e9a20fX2De01eX2D4df2X2D9db0X2De2f8846167cb <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.ica.org/standards/RiC/ontology#Identifier> .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <https://www.ica.org/standards/RiC/ontology#isDerivedFromInstantiation> <https://memobase.ch/physical/Tanzarchiv-42858-43349-1> .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <https://www.ica.org/standards/RiC/ontology#instantiates> <https://memobase.ch/record/Tanzarchiv-42858-43349> .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#locator> "https://vimeo.com/
199629565
" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#locator> "https://vimeo.com/
223023510
" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <https://www.ica.org/standards/RiC/ontology#identifiedBy> _:B78e9a20fX2De01eX2D4df2X2D9db0X2De2f8846167cb .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <https://www.ica.org/standards/RiC/ontology#type> "digitalObject" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.ica.org/standards/RiC/ontology#Instantiation> .
\ No newline at end of file
src/test/resources/data/output4.nt
View file @
7b86d221
...
...
@@ -3,7 +3,9 @@
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1/derived> <https://www.ica.org/standards/RiC/ontology#instantiates> <https://memobase.ch/record/Tanzarchiv-42858-43349> .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1/derived> <https://www.ica.org/standards/RiC/ontology#isDerivedFromInstantiation> <https://memobase.ch/digital/Tanzarchiv-42858-43349-1> .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1/derived> <https://www.ica.org/standards/RiC/ontology#type> "thumbnail" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#locator> "https://vimeo.com/199629565" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#height> "360" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#locator> "https://vimeo.com/223023510" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#width> "640" .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.ica.org/standards/RiC/ontology#Instantiation> .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <https://www.ica.org/standards/RiC/ontology#hasDerivedInstantiation> <https://memobase.ch/digital/Tanzarchiv-42858-43349-1/derived> .
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1> <https://www.ica.org/standards/RiC/ontology#identifiedBy> _:B .
...
...
src/test/resources/data/turtle-output4.ttl
View file @
7b86d221
...
...
@@ -44,6 +44,10 @@ mbr:Tanzarchiv-42858-43349
rico:
type
"main"
]
;
rico:
heldBy
mbcb:
Tanzarchiv
;
rico:
identifiedBy
[
a
rico:
Identifier
;
rico:
identifier
"43349"
;
rico:
type
"original"
]
;
rico:
identifiedBy
[
a
rico:
Identifier
;
rico:
identifier
"Tanzarchiv-42858-43349"
;
rico:
type
"main"
...
...
@@ -52,10 +56,6 @@ mbr:Tanzarchiv-42858-43349
rico:
identifier
"Tanzarchiv-43349"
;
rico:
type
"oldMemobase"
]
;
rico:
identifiedBy
[
a
rico:
Identifier
;
rico:
identifier
"43349"
;
rico:
type
"original"
]
;
rico:
isPartOf
mbrs:
Tanzarchiv-42858
;
rico:
recordResourceOrInstantiationIsSourceOfCreationRelation
_:
b0
,
_:
b1
,
_:
b2
;
...
...
@@ -70,7 +70,9 @@ mbr:Tanzarchiv-42858-43349
mbdo:
Tanzarchiv-42858-43349-1
a
rico:
Instantiation
;
ebucore:
locator
"https://vimeo.com/199629565"
;
ebucore:
height
"360"
;
ebucore:
locator
"https://vimeo.com/223023510"
;
ebucore:
width
"640"
;
rico:
hasDerivedInstantiation
<https://memobase.ch/digital/Tanzarchiv-42858-43349-1/derived>
;
rico:
identifiedBy
[
a
rico:
Identifier
;
rico:
identifier
"Tanzarchiv-42858-43349-1"
;
...
...
@@ -81,15 +83,15 @@ mbdo:Tanzarchiv-42858-43349-1
mbpo:
Tanzarchiv-42858-43349-1
;
rico:
type
"digitalObject"
.
_:
b
0
a
rico:
CreationRelation
;
_:
b
2
a
rico:
CreationRelation
;
rico:
creationRelationHasSource
mbr:
Tanzarchiv-42858-43349
;
rico:
creationRelationHasTarget
[
a
rico:
Person
;
rico:
agentIsTargetOfCreationRelation
_:
b
0
;
rico:
name
"
Christophe Calpini
"
_:
b
2
;
rico:
name
"
Katarzyna Gdaniec (Choreograf / Chorégraphe)
"
]
;
rico:
name
"
Komponisten / Compositeu
r"
;
rico:
type
"c
ontribu
tor"
.
rico:
name
"
Autho
r"
;
rico:
type
"c
rea
tor"
.
mbpo:
Tanzarchiv-42858-43349-1
a
rico:
Instantiation
;
...
...
@@ -123,11 +125,11 @@ mbpo:Tanzarchiv-42858-43349-1
mbdo:
Tanzarchiv-42858-43349-1
;
rico:
type
"thumbnail"
.
_:
b
2
a
rico:
CreationRelation
;
_:
b
0
a
rico:
CreationRelation
;
rico:
creationRelationHasSource
mbr:
Tanzarchiv-42858-43349
;
rico:
creationRelationHasTarget
[
a
rico:
CorporateBody
;
rico:
agentIsTargetOfCreationRelation
_:
b
2
;
_:
b
0
;
rico:
name
"Compagnie Linga"
]
;
rico:
name
"Kompanie / Compagnie"
;
...
...
@@ -138,7 +140,7 @@ _:b1 a rico:CreationRelation ;
rico:
creationRelationHasTarget
[
a
rico:
Person
;
rico:
agentIsTargetOfCreationRelation
_:
b1
;
rico:
name
"
Katarzyna Gdaniec (Choreograf / Chorégraphe)
"
rico:
name
"
Christophe Calpini
"
]
;
rico:
name
"
Autho
r"
;
rico:
type
"c
rea
tor"
.
rico:
name
"
Komponisten / Compositeu
r"
;
rico:
type
"c
ontribu
tor"
.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment