Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
memoriav
Memobase 2020
services
Import Process
Media-Linker
Commits
a098d053
Unverified
Commit
a098d053
authored
Oct 23, 2020
by
Sebastian Schüpbach
Browse files
add youtube / vimeo thumbnail fetching
parent
01fb9b18
Pipeline
#16006
passed with stages
in 4 minutes and 47 seconds
Changes
9
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
src/main/kotlin/Constant.kt
View file @
a098d053
...
...
@@ -2,10 +2,12 @@ package org.memobase
object
Constant
{
const
val
mediaFolderName
=
"media"
const
val
thumbnailFolderName
=
"thumbnails"
const
val
thumbnailFolderName
=
"thumbnails"
const
val
sftpBasePathPropertyName
=
"sftp.basePath"
const
val
extensionsPropertyName
=
"extensions"
const
val
vimeoThumbnailWidth
=
1000
const
val
rdfParserLang
=
"NTRIPLES"
const
val
digitalObject
=
"digitalObject"
...
...
@@ -13,5 +15,4 @@ object Constant {
const
val
thumbnailRicoType
=
"thumbnail"
const
val
sftpPathPrefix
=
"sftp:"
}
\ No newline at end of file
}
src/main/kotlin/KafkaTopology.kt
View file @
a098d053
...
...
@@ -18,6 +18,8 @@
package
org.memobase
import
java.io.StringReader
import
java.io.StringWriter
import
org.apache.jena.rdf.model.Model
import
org.apache.jena.rdf.model.ModelFactory
import
org.apache.jena.rdf.model.Resource
...
...
@@ -33,13 +35,12 @@ import org.memobase.settings.SettingsLoader
import
org.memobase.sftp.SftpClient
import
settings.HeaderExtractionTransformSupplier
import
settings.HeaderMetadata
import
java.io.StringReader
import
java.io.StringWriter
class
KafkaTopology
(
private
val
settings
:
SettingsLoader
)
{
private
val
appSettings
=
settings
.
appSettings
private
val
sftpClient
=
SftpClient
(
settings
.
sftpSettings
)
private
val
previewImageHandler
=
PreviewImageHandler
(
sftpClient
)
private
val
sftpBasePath
=
appSettings
.
getProperty
(
Constant
.
sftpBasePathPropertyName
)
private
val
fileExtensions
=
appSettings
.
getProperty
(
Constant
.
extensionsPropertyName
).
split
(
","
)
private
val
reportingTopic
=
settings
.
processReportTopic
...
...
@@ -68,7 +69,6 @@ class KafkaTopology(private val settings: SettingsLoader) {
val
updateDigitalObjects
=
instantiationBranch
[
0
]
.
mapValues
{
readOnlyKey
,
value
->
enrichSftpLocator
(
readOnlyKey
,
value
,
Constant
.
mediaFolderName
)
}
updateDigitalObjects
.
filterNot
{
_
,
value
->
value
.
third
.
status
==
ReportStatus
.
failure
}
// failed records are deleted.
.
mapValues
{
value
->
...
...
@@ -85,6 +85,7 @@ class KafkaTopology(private val settings: SettingsLoader) {
instantiationBranch
[
1
]
.
filterNot
{
_
,
value
->
value
.
third
.
status
==
ReportStatus
.
failure
}
// failed records are deleted.
.
mapValues
{
value
->
fetchThumbnailForYoutubeOrVimeoFile
(
value
)
}
.
mapValues
{
value
->
val
out
=
StringWriter
()
value
.
first
.
first
.
write
(
out
,
Constant
.
rdfParserLang
)
...
...
@@ -99,6 +100,58 @@ class KafkaTopology(private val settings: SettingsLoader) {
return
builder
}
private
fun
fetchThumbnailForYoutubeOrVimeoFile
(
value
:
Triple
<
Pair
<
Model
,
HeaderMetadata
>,
List
<
Resource
>,
Report
>):
Triple
<
Pair
<
Model
,
HeaderMetadata
>,
List
<
Resource
>,
Report
>
{
if
(
noThumbnailAttached
(
value
.
second
))
{
val
record
=
value
.
second
.
firstOrNull
{
it
.
hasProperty
(
RDF
.
type
,
RICO
.
Record
)
}
val
digitalObject
=
value
.
second
.
firstOrNull
{
it
.
hasProperty
(
RDF
.
type
,
Constant
.
digitalObject
)
}
if
(
record
!=
null
&&
digitalObject
!=
null
)
{
val
locator
=
digitalObject
.
getProperty
(
EBUCORE
.
locator
).
string
when
{
PreviewImageHandler
.
isVimeoUrl
(
locator
)
->
{
this
.
previewImageHandler
.
getFromVimeo
(
locator
,
Constant
.
vimeoThumbnailWidth
)
}
PreviewImageHandler
.
isYoutubeUrl
(
locator
)
->
{
this
.
previewImageHandler
.
getFromYoutube
(
locator
)
}
else
->
{
null
}
}
?.
let
{
val
pathOnSftpServer
=
previewImageHandler
.
moveFileToSFTP
(
it
,
"$sftpBasePath/${value.first.second.recordSetId}/${Constant.thumbnailFolderName}/$value.jpg"
)
if
(
pathOnSftpServer
!=
null
)
{
createThumbnailResource
(
value
.
first
.
first
,
record
,
digitalObject
,
pathOnSftpServer
)
val
amendedReport
=
Report
(
value
.
third
.
id
,
value
.
third
.
status
,
value
.
third
.
message
+
"; youtube / vimeo thumbnail fetched"
)
return
value
.
copy
(
third
=
amendedReport
)
}
else
{
val
amendedReport
=
Report
(
value
.
third
.
id
,
ReportStatus
.
failure
,
value
.
third
.
message
+
"; youtube / vimeo thumbnail couldn't be uploaded to Sftp server"
)
return
value
.
copy
(
third
=
amendedReport
)
}
}
}
}
val
amendedReport
=
Report
(
value
.
third
.
id
,
ReportStatus
.
success
,
value
.
third
.
message
+
"; no additional youtube / vimeo thumbnails fetched"
)
return
value
.
copy
(
third
=
amendedReport
)
}
private
fun
noThumbnailAttached
(
resources
:
List
<
Resource
>):
Boolean
{
return
resources
.
none
{
it
.
hasProperty
(
RICO
.
type
,
Constant
.
thumbnailRicoType
)
}
}
private
fun
extractSubjects
(
input
:
Pair
<
Model
,
HeaderMetadata
>):
Pair
<
Pair
<
Model
,
HeaderMetadata
>,
List
<
Resource
>>
{
return
Pair
(
input
,
input
.
first
.
listSubjects
().
toList
())
}
...
...
@@ -123,12 +176,31 @@ class KafkaTopology(private val settings: SettingsLoader) {
return
Pair
(
model
,
data
.
second
)
}
private
fun
createThumbnailResource
(
data
:
Model
,
record
:
Resource
,
digitalObject
:
Resource
,
locator
:
String
)
{
val
thumbnail
=
data
.
createResource
(
"https://memobase.ch/digital/${digitalObject.uri.substringAfterLast("
/
")}/derived"
)
val
literal
=
ResourceFactory
.
createPlainLiteral
(
locator
)
thumbnail
.
addProperty
(
RDF
.
type
,
RICO
.
Instantiation
)
thumbnail
.
addProperty
(
RICO
.
type
,
Constant
.
thumbnailRicoType
)
thumbnail
.
addProperty
(
EBUCORE
.
locator
,
literal
)
digitalObject
.
addProperty
(
RICO
.
hasDerivedInstantiation
,
thumbnail
)
thumbnail
.
addProperty
(
RICO
.
isDerivedFromInstantiation
,
digitalObject
)
record
.
addProperty
(
RICO
.
hasInstantiation
,
thumbnail
)
thumbnail
.
addProperty
(
RICO
.
instantiates
,
record
)
}
private
fun
enrichSftpLocator
(
key
:
String
,
data
:
Triple
<
Pair
<
Model
,
HeaderMetadata
>,
List
<
Resource
>,
Report
>,
type
:
String
):
Triple
<
Pair
<
Model
,
HeaderMetadata
>,
List
<
Resource
>,
Report
>
{
var
link
=
""
var
link
:
String
return
data
.
second
.
firstOrNull
{
it
.
hasProperty
(
RDF
.
type
,
RICO
.
Record
)
}.
let
{
record
->
if
(
record
!=
null
)
{
data
.
second
.
firstOrNull
{
it
.
hasProperty
(
RICO
.
type
,
Constant
.
digitalObject
)
}.
let
{
digitalObject
->
...
...
@@ -153,17 +225,7 @@ class KafkaTopology(private val settings: SettingsLoader) {
digitalObject
.
addLiteral
(
EBUCORE
.
locator
,
literal
)
data
.
first
.
first
.
createLiteral
(
digitalObject
.
toString
(),
true
)
}
else
if
(
type
==
Constant
.
thumbnailFolderName
)
{
val
thumbnail
=
data
.
first
.
first
.
createResource
(
"https://memobase.ch/digital/${digitalObject.uri.substringAfterLast("
/
")}/derived"
)
val
literal
=
ResourceFactory
.
createPlainLiteral
(
link
)
thumbnail
.
addProperty
(
RDF
.
type
,
RICO
.
Instantiation
)
thumbnail
.
addProperty
(
RICO
.
type
,
Constant
.
thumbnailRicoType
)
thumbnail
.
addProperty
(
EBUCORE
.
locator
,
literal
)
digitalObject
.
addProperty
(
RICO
.
hasDerivedInstantiation
,
thumbnail
)
thumbnail
.
addProperty
(
RICO
.
isDerivedFromInstantiation
,
digitalObject
)
record
.
addProperty
(
RICO
.
hasInstantiation
,
thumbnail
)
thumbnail
.
addProperty
(
RICO
.
instantiates
,
record
)
createThumbnailResource
(
data
.
first
.
first
,
record
,
digitalObject
,
link
)
}
return
Triple
(
data
.
first
,
...
...
src/main/kotlin/PreviewImageHandler.kt
0 → 100644
View file @
a098d053
package
org.memobase
import
java.io.FileNotFoundException
import
java.io.FileOutputStream
import
java.io.IOException
import
java.net.HttpURLConnection
import
java.net.URL
import
java.nio.file.Files
import
java.nio.file.Paths
import
org.apache.logging.log4j.LogManager
import
org.memobase.sftp.SftpClient
/**
* Fetches preview images for videos on Vimeo or Youtube
*/
class
PreviewImageHandler
(
private
val
sftpClient
:
SftpClient
)
{
private
val
log
=
LogManager
.
getLogger
(
"MediaLinker"
)
companion
object
{
/**
* Checks if URL points to Vimeo
*
* @param URL to be scrutinised
*
* @return true if URL points to Vimeo
*/
fun
isVimeoUrl
(
url
:
String
):
Boolean
{
return
URL
(
url
).
host
.
toLowerCase
()
==
"vimeo.com"
}
/**
* Checks if URL point to Youtube
*
* @param url URL to be scrutinised
*
* @return true if URL points to Youtube
*/
fun
isYoutubeUrl
(
url
:
String
):
Boolean
{
return
listOf
(
"youtube.com"
,
"youtu.be"
).
contains
(
URL
(
url
).
host
.
toLowerCase
())
}
}
private
fun
get
(
urlAsString
:
String
):
String
?
{
val
url
=
URL
(
urlAsString
)
return
try
{
val
tempFile
=
Files
.
createTempFile
(
""
,
".jpg"
)
val
outputStream
=
FileOutputStream
(
tempFile
.
toFile
())
with
(
url
.
openConnection
()
as
HttpURLConnection
)
{
requestMethod
=
"GET"
outputStream
.
use
{
fileOut
->
inputStream
.
copyTo
(
fileOut
)
}
}
tempFile
.
toString
()
}
catch
(
ex
:
IOException
)
{
log
.
error
(
"Downloading of thumbnail file failed: ${ex.message}"
)
null
}
catch
(
ex
:
FileNotFoundException
)
{
log
.
error
(
"Can't find temporary file: ${ex.message}"
)
null
}
}
/**
* Get preview image from Youtube
*
* @param videoURL URL of video
*
* @return Path to local file
*/
fun
getFromYoutube
(
videoURL
:
String
):
String
?
{
val
id
=
URL
(
videoURL
).
query
.
split
(
"&"
).
firstOrNull
{
it
.
startsWith
(
"v="
)
}
return
if
(
id
!=
null
)
{
get
(
"https://img.youtube.com/vi/${id.substring(2)}/hqdefault.jpg"
)
}
else
{
null
}
}
/**
* Get preview image from Vimeo
*
* @param videoURL URL of video
* @param width Width of preview image
*
* @return Path to local file
*/
fun
getFromVimeo
(
videoURL
:
String
,
width
:
Int
):
String
?
{
val
id
=
URL
(
videoURL
).
path
.
split
(
"/"
).
last
()
return
get
(
"https://i.vimeocdn.com/vimeo/${id}_$width.jpg"
)
}
/**
* Put file on sFTP server and delete local copy
*
* @param sourcePath Path to local temp file
* @param destPath Path to file about to be created on sFTP server
*/
fun
moveFileToSFTP
(
sourcePath
:
String
,
destPath
:
String
):
String
?
{
return
try
{
sftpClient
.
put
(
sourcePath
,
destPath
)
Files
.
delete
(
Paths
.
get
(
sourcePath
))
destPath
}
catch
(
ex
:
IOException
)
{
log
.
warn
(
"Moving thumbnail file to sFTP server failed: ${ex.message}"
)
null
}
}
}
src/main/kotlin/Report.kt
View file @
a098d053
...
...
@@ -53,5 +53,4 @@ data class Report(
result
=
31
*
result
+
step
.
hashCode
()
return
result
}
}
src/main/kotlin/Service.kt
View file @
a098d053
...
...
@@ -17,10 +17,10 @@
*/
package
org.memobase
import
kotlin.system.exitProcess
import
org.apache.kafka.streams.KafkaStreams
import
org.apache.logging.log4j.LogManager
import
org.memobase.settings.SettingsLoader
import
kotlin.system.exitProcess
class
Service
(
file
:
String
=
"app.yml"
)
{
private
val
log
=
LogManager
.
getLogger
(
"MediaLinkerService"
)
...
...
src/main/kotlin/reports/ReportMessages.kt
View file @
a098d053
...
...
@@ -17,4 +17,4 @@ object ReportMessages {
fun
noOriginalIdentifier
(
key
:
String
):
String
{
return
"The record $key does not contain a identifier with rico:type 'original'!"
}
}
\ No newline at end of file
}
src/main/kotlin/reports/ReportStatus.kt
View file @
a098d053
...
...
@@ -3,4 +3,4 @@ package org.memobase.reports
object
ReportStatus
{
const
val
success
=
"SUCCESS"
const
val
failure
=
"FAILURE"
}
\ No newline at end of file
}
src/test/kotlin/TestKafkaTopology.kt
View file @
a098d053
...
...
@@ -18,6 +18,12 @@
package
org.memobase
import
com.beust.klaxon.Klaxon
import
java.io.File
import
java.io.FileInputStream
import
java.io.FileOutputStream
import
java.nio.charset.Charset
import
java.nio.file.Paths
import
java.util.stream.Stream
import
org.apache.jena.rdf.model.ModelFactory
import
org.apache.jena.riot.Lang
import
org.apache.jena.riot.RDFDataMgr
...
...
@@ -28,7 +34,6 @@ import org.apache.kafka.common.serialization.StringDeserializer
import
org.apache.kafka.common.serialization.StringSerializer
import
org.apache.kafka.streams.TopologyTestDriver
import
org.apache.kafka.streams.test.ConsumerRecordFactory
import
org.apache.logging.log4j.LogManager
import
org.assertj.core.api.Assertions.assertThat
import
org.junit.jupiter.api.TestInstance
import
org.junit.jupiter.api.assertAll
...
...
@@ -36,12 +41,6 @@ import org.junit.jupiter.params.ParameterizedTest
import
org.junit.jupiter.params.provider.MethodSource
import
org.memobase.rdf.NS
import
org.memobase.testing.EmbeddedSftpServer
import
java.io.File
import
java.io.FileInputStream
import
java.io.FileOutputStream
import
java.nio.charset.Charset
import
java.nio.file.Paths
import
java.util.stream.Stream
@TestInstance
(
TestInstance
.
Lifecycle
.
PER_CLASS
)
class
TestKafkaTopology
{
...
...
@@ -143,7 +142,6 @@ class TestKafkaTopology {
val
data
=
reportedRecord
.
value
()
val
report
=
Klaxon
().
parse
<
Report
>(
data
)
assertAll
(
""
,
{
...
...
@@ -176,5 +174,4 @@ class TestKafkaTopology {
""
)
)
}
\ No newline at end of file
}
src/test/resources/data/turtle-output1.ttl
View file @
a098d053
@prefix
schema:
<http://schema.org/>
.
@prefix
internal:
<http://memobase.ch/internal/>
.
@prefix
owl:
<http://www.w3.org/2002/07/owl#>
.
@prefix
mbrs:
<https://memobase.ch/recordSet/>
.
@prefix
owl:
<http://www.w3.org/2002/07/owl#>
.
@prefix
wdt:
<http://www.wikidata.org/prop/direct/>
.
@prefix
mbcb:
<https://memobase.ch/institution/>
.
@prefix
mbpo:
<https://memobase.ch/physical/>
.
@prefix
mbcb:
<https://memobase.ch/institution/>
.
@prefix
xsd:
<http://www.w3.org/2001/XMLSchema#>
.
@prefix
skos:
<http://www.w3.org/2004/02/skos/core#>
.
@prefix
rdfs:
<http://www.w3.org/2000/01/rdf-schema#>
.
@prefix
wd:
<http://www.wikidata.org/entity/>
.
@prefix
wdtn:
<http://www.wikidata.org/prop/direct-normalized/>
.
@prefix
rdau:
<http://rdaregistry.info/Elements/u/>
.
@prefix
mbdo:
<https://memobase.ch/digital/>
.
@prefix
rd
f:
<http://
www.w3.org/1999/02/22-rdf-syntax-ns#
>
.
@prefix
rd
au:
<http://
rdaregistry.info/Elements/u/
>
.
@prefix
fedora:
<http://fedora.info/definitions/v4/repository#>
.
@prefix
rdf:
<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
.
@prefix
rico:
<https://www.ica.org/standards/RiC/ontology#>
.
@prefix
ebucore:
<http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#>
.
@prefix
dcterms:
<http://purl.org/dc/terms/>
.
@prefix
ldp:
<http://www.w3.org/ns/ldp#>
.
@prefix
dcterms:
<http://purl.org/dc/terms/>
.
@prefix
mbr:
<https://memobase.ch/record/>
.
@prefix
foaf:
<http://xmlns.com/foaf/0.1/>
.
@prefix
dc:
<http://purl.org/dc/elements/1.1/>
.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment