Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
memoriav
Memobase 2020
services
postprocessing
Media Metadata Indexer
Commits
f1839a23
Unverified
Commit
f1839a23
authored
Nov 06, 2020
by
Sebastian Schüpbach
Browse files
retry if connection to kafka / mariadb can't be established
Signed-off-by:
Sebastian Schüpbach
<
sebastian.schuepbach@unibas.ch
>
parent
6214a51a
Changes
3
Hide whitespace changes
Inline
Side-by-side
k8s-manifests/test/kubernetes-deployment.yml
View file @
f1839a23
...
...
@@ -18,7 +18,14 @@ spec:
spec
:
containers
:
-
name
:
mediametadatatodb-container
image
:
cr.gitlab.switch.ch/memoriav/memobase-2020/services/postprocessing/mediametadatatodb:latest
image
:
cr.gitlab.switch.ch/memoriav/memobase-2020/services/postprocessing/mediametadatatodb:access
env
:
-
name
:
KAFKA_GROUP_ID
value
:
"
medienserverMetadataService"
-
name
:
KAFKA_CONNECTION_RETRIES
value
:
"
3"
-
name
:
MARIADB_CONNECTION_RETRIES
value
:
"
3"
envFrom
:
-
configMapRef
:
name
:
"
prod-kafka-bootstrap-servers"
...
...
mediametadatatodb_app/main.py
View file @
f1839a23
from
mediametadatatodb_app.resources.MediametadataToDB
import
MediametadataToDB
if
__name__
==
"__main__"
:
m
=
MediametadataToDB
()
m
.
run
()
MediametadataToDB
.
run
()
mediametadatatodb_app/resources/MediametadataToDB.py
View file @
f1839a23
...
...
@@ -2,11 +2,63 @@ import json
import
logging
import
numbers
import
os
import
time
import
mysql.connector
as
mariadb
# noinspection PyPackageRequirements
from
kafka
import
KafkaConsumer
# noinspection PyPackageRequirements
from
kafka.errors
import
KafkaError
from
kubernetes
import
config
from
kubernetes.config.config_exception
import
ConfigException
as
K8sConfigException
def
_connect_to_kafka
(
retries
=
0
):
try
:
consumer
=
KafkaConsumer
(
'fedora-output-json-records'
,
value_deserializer
=
lambda
m
:
json
.
loads
(
m
.
decode
(
'utf8'
)),
bootstrap_servers
=
os
.
environ
[
'KAFKA_BOOTSTRAP_SERVERS'
],
auto_offset_reset
=
'earliest'
,
enable_auto_commit
=
False
,
group_id
=
os
.
environ
[
'KAFKA_GROUP_ID'
],
consumer_timeout_ms
=
30000
)
return
consumer
except
KafkaError
as
ex
:
status
=
'KafkaError: '
+
str
(
ex
)
logging
.
error
(
status
)
if
retries
<
int
(
os
.
environ
[
'KAFKA_CONNECTION_RETRIES'
]):
time
.
sleep
(
30
*
(
retries
+
1
))
_connect_to_kafka
(
retries
+
1
)
exit
(
1
)
except
Exception
as
ex
:
status
=
'Exception: '
+
str
(
ex
)
logging
.
error
(
status
)
if
retries
<
int
(
os
.
environ
[
'KAFKA_CONNECTION_RETRIES'
]):
time
.
sleep
(
30
*
(
retries
+
1
))
_connect_to_kafka
(
retries
+
1
)
exit
(
1
)
def
_connect_to_mariadb
(
retries
=
0
):
password
=
os
.
environ
[
"mediaserver"
].
split
(
':'
)[
1
].
split
(
'@tcp('
)[
0
]
try
:
mariadb_connection
=
mariadb
.
connect
(
user
=
'medienserver'
,
password
=
password
,
host
=
'mb-db1.memobase.unibas.ch'
,
port
=
3306
,
database
=
'medienserver'
)
mariadb_connection
.
autocommit
=
False
mariadb_cursor
=
mariadb_connection
.
cursor
()
mariadb_cursor
.
execute
(
"USE medienserver"
)
return
mariadb_connection
,
mariadb_cursor
except
Exception
as
ex
:
status
=
'Exception: '
+
str
(
ex
)
logging
.
error
(
status
)
if
retries
<
int
(
os
.
environ
[
'MARIADB_CONNECTION_RETRIES'
]):
time
.
sleep
(
30
*
(
retries
+
1
))
_connect_to_kafka
(
retries
+
1
)
exit
(
1
)
def
_try_fetch_from_json_object
(
record_json_data
,
...
...
@@ -45,50 +97,50 @@ def _get_values_from_thumbnail_object(msg, _access_status):
def
_get_values_from_digital_object
(
msg
,
access_status
):
sig
=
obj_type
=
None
file
E
xtension
=
''
return
V
alues
=
{
file
_e
xtension
=
''
return
_v
alues
=
{
'access'
:
access_status
}
if
'@id'
in
msg
:
v1
=
msg
[
'@id'
]
v2
=
v1
.
split
(
'/'
)
sig
=
v2
[
-
1
]
return
V
alues
[
'sig'
]
=
sig
return
_v
alues
[
'sig'
]
=
sig
if
'hasMimeType'
in
msg
:
mimetype
=
msg
[
'hasMimeType'
]
return
V
alues
[
'mimetype'
]
=
mimetype
return
_v
alues
[
'mimetype'
]
=
mimetype
# create value for field 'type' form mimetype:
obj_type
=
mimetype
.
split
(
'/'
)[
0
]
if
'locator'
in
msg
and
'https://memobase.ch/'
not
in
msg
[
'locator'
]:
uri
=
msg
[
'locator'
]
else
:
if
obj_type
==
'image'
:
file
E
xtension
=
'jp2'
file
_e
xtension
=
'jp2'
if
obj_type
==
'audio'
:
file
E
xtension
=
'mp4'
file
_e
xtension
=
'mp4'
if
obj_type
==
'video'
:
file
E
xtension
=
'mp4'
uri
=
os
.
environ
[
'URI_BASE'
]
+
sig
+
'.'
+
file
E
xtension
return
V
alues
[
'uri'
]
=
uri
file
_e
xtension
=
'mp4'
uri
=
os
.
environ
[
'URI_BASE'
]
+
sig
+
'.'
+
file
_e
xtension
return
_v
alues
[
'uri'
]
=
uri
if
'height'
in
msg
:
height
=
msg
[
'height'
]
return
V
alues
[
'height'
]
=
height
return
_v
alues
[
'height'
]
=
height
if
'width'
in
msg
:
width
=
msg
[
'width'
]
return
V
alues
[
'width'
]
=
width
return
_v
alues
[
'width'
]
=
width
if
'duration'
in
msg
:
duration
=
msg
[
'duration'
]
return
V
alues
[
'duration'
]
=
duration
return
_v
alues
[
'duration'
]
=
duration
# if uri uses a play to show content, use a special 'type':
if
'isDistributedOn'
in
msg
:
if
msg
[
'isDistributedOn'
]
==
'file'
and
uri
.
startswith
(
'http'
):
if
access_status
==
'public'
:
return
V
alues
[
'proto'
]
=
'redirect'
return
_v
alues
[
'proto'
]
=
'redirect'
else
:
return
V
alues
[
'proto'
]
=
'proxy'
return
_v
alues
[
'proto'
]
=
'proxy'
else
:
return
V
alues
[
'type'
]
=
msg
[
'isDistributedOn'
]
return
return
V
alues
return
_v
alues
[
'type'
]
=
msg
[
'isDistributedOn'
]
return
return
_v
alues
def
_get_access_status
(
graph
):
...
...
@@ -112,20 +164,21 @@ def _create_sql_stmt(table_name, record, fields):
else
"'{}'"
.
format
(
record
[
db_field
])
for
db_field
in
db_fields
])
db_fields
=
','
.
join
(
db_fields
)
# noinspection SqlNoDataSourceInspection
return
'INSERT IGNORE INTO {} ({}) VALUES ({})'
.
format
(
table_name
,
db_fields
,
db_values
)
def
_create_entities_entry
(
record
,
mariadb
C
ursor
):
def
_create_entities_entry
(
record
,
mariadb
_c
ursor
):
fields
=
[
'sig'
,
'uri'
,
'access'
,
'proto'
]
sql
S
tmt
=
_create_sql_stmt
(
'entities'
,
record
,
fields
)
mariadb
C
ursor
.
execute
(
sql
S
tmt
)
sql
_s
tmt
=
_create_sql_stmt
(
'entities'
,
record
,
fields
)
mariadb
_c
ursor
.
execute
(
sql
_s
tmt
)
def
_create_metadata_entry
(
record
,
mariadb
C
ursor
):
def
_create_metadata_entry
(
record
,
mariadb
_c
ursor
):
fields
=
[
'sig'
,
'mimetype'
,
'height'
,
'width'
,
'duration'
,
'type'
]
sql
S
tmt
=
_create_sql_stmt
(
'metadata'
,
record
,
fields
)
mariadb
C
ursor
.
execute
(
sql
S
tmt
)
sql
_s
tmt
=
_create_sql_stmt
(
'metadata'
,
record
,
fields
)
mariadb
_c
ursor
.
execute
(
sql
_s
tmt
)
def
_has_audio_snippet
(
record
):
...
...
@@ -134,34 +187,35 @@ def _has_audio_snippet(record):
record
[
'uri'
].
startswith
(
'file://'
)
def
_create_audio_snippet_entry
(
record
,
mariadb
C
ursor
):
def
_create_audio_snippet_entry
(
record
,
mariadb
_c
ursor
):
snippet_record
=
record
.
copy
()
snippet_record
[
'sig'
]
=
snippet_record
[
'sig'
]
+
'-intro'
snippet_record
[
'duration'
]
=
\
30.0
if
float
(
snippet_record
[
'duration'
])
>=
30.0
\
else
float
(
snippet_record
[
'duration'
])
else
float
(
snippet_record
[
'duration'
])
snippet_record
[
'mimetype'
]
=
'audio/mpeg'
snippet_record
[
'uri'
]
=
\
'.'
.
join
(
snippet_record
[
'uri'
].
split
(
'.'
)[
0
:
-
1
])
+
'-intro.mp3'
_create_entities_entry
(
snippet_record
,
mariadb
C
ursor
)
_create_metadata_entry
(
snippet_record
,
mariadb
C
ursor
)
_create_entities_entry
(
snippet_record
,
mariadb
_c
ursor
)
_create_metadata_entry
(
snippet_record
,
mariadb
_c
ursor
)
def
_write_values_in_db
(
mariadb
C
ursor
,
record
V
alues
ForDB
):
def
_write_values_in_db
(
mariadb
_c
ursor
,
record
_v
alues
_for_db
):
try
:
for
record
in
record
V
alues
ForDB
:
_create_entities_entry
(
record
,
mariadb
C
ursor
)
_create_metadata_entry
(
record
,
mariadb
C
ursor
)
for
record
in
record
_v
alues
_for_db
:
_create_entities_entry
(
record
,
mariadb
_c
ursor
)
_create_metadata_entry
(
record
,
mariadb
_c
ursor
)
if
_has_audio_snippet
(
record
):
_create_audio_snippet_entry
(
record
,
mariadb
C
ursor
)
_create_audio_snippet_entry
(
record
,
mariadb
_c
ursor
)
except
Exception
as
ex
:
status
=
'Exception: '
+
str
(
ex
)
logging
.
error
(
status
)
class
MediametadataToDB
()
:
class
MediametadataToDB
:
# Todo write/correct comment for swagger
def
run
(
self
):
@
staticmethod
def
run
():
"""
Import media metadata to mariaDB
This service should not return anything but run forever.
...
...
@@ -178,71 +232,41 @@ class MediametadataToDB():
example: there was an exception
"""
# connect to kafka:
try
:
consumer
=
KafkaConsumer
(
'fedora-output-json-records'
,
value_deserializer
=
lambda
m
:
json
.
loads
(
m
.
decode
(
'utf8'
)),
bootstrap_servers
=
os
.
environ
[
'KAFKA_BOOTSTRAP_SERVERS'
],
auto_offset_reset
=
'earliest'
,
enable_auto_commit
=
False
,
group_id
=
'medienserverMetadataService14'
,
consumer_timeout_ms
=
30000
)
except
KafkaError
as
ex
:
status
=
'KafkaError: '
+
str
(
ex
)
logging
.
error
(
status
)
except
Exception
as
ex
:
status
=
'Exception: '
+
str
(
ex
)
logging
.
error
(
status
)
# connect to mariadb:
password
=
os
.
environ
[
"mediaserver"
].
split
(
':'
)[
1
].
split
(
'@tcp('
)[
0
]
try
:
mariadbConnection
=
mariadb
.
connect
(
user
=
'medienserver'
,
password
=
password
,
host
=
'mb-db1.memobase.unibas.ch'
,
port
=
3306
,
database
=
'medienserver'
)
mariadbConnection
.
autocommit
=
False
mariadbCursor
=
mariadbConnection
.
cursor
()
mariadbCursor
.
execute
(
"USE medienserver"
)
except
Exception
as
ex
:
status
=
'Exception: '
+
str
(
ex
)
logging
.
error
(
status
)
consumer
=
_connect_to_kafka
()
mariadb_connection
,
mariadb_cursor
=
_connect_to_mariadb
()
# process messages:
record
V
alues
ForDB
=
[]
record
_v
alues
_for_db
=
[]
try
:
# read messages from kafka
while
True
:
consumer
.
poll
(
max_records
=
25
)
for
recordsJson
in
consumer
:
records
J
son
D
ata
=
recordsJson
.
value
[
'@graph'
]
access_status
=
_get_access_status
(
records
J
son
D
ata
)
records
_j
son
_d
ata
=
recordsJson
.
value
[
'@graph'
]
access_status
=
_get_access_status
(
records
_j
son
_d
ata
)
if
access_status
==
'public'
or
access_status
==
'closed'
:
for
recordJsonData
in
records
J
son
D
ata
:
for
recordJsonData
in
records
_j
son
_d
ata
:
if
'type'
in
recordJsonData
and
\
recordJsonData
[
'type'
]
==
'digitalObject'
:
_try_fetch_from_json_object
(
recordJsonData
,
record
V
alues
ForDB
,
_try_fetch_from_json_object
(
recordJsonData
,
record
_v
alues
_for_db
,
_get_values_from_digital_object
,
access_status
)
if
'type'
in
recordJsonData
and
\
recordJsonData
[
'type'
]
==
'thumbnail'
:
_try_fetch_from_json_object
(
recordJsonData
,
record
V
alues
ForDB
,
_try_fetch_from_json_object
(
recordJsonData
,
record
_v
alues
_for_db
,
_get_values_from_thumbnail_object
,
access_status
)
# if readMessageCounter >= 100:
# break
# to consider: we could skip this next block and rely on max_records instead
if
len
(
record
V
alues
ForDB
)
>=
25
:
_write_values_in_db
(
mariadb
C
ursor
,
record
V
alues
ForDB
)
mariadb
C
onnection
.
commit
()
record
V
alues
ForDB
=
[]
if
len
(
record
_v
alues
_for_db
)
>=
25
:
_write_values_in_db
(
mariadb
_c
ursor
,
record
_v
alues
_for_db
)
mariadb
_c
onnection
.
commit
()
record
_v
alues
_for_db
=
[]
consumer
.
commit
()
# <-- uncomment this for production!
# arriving here means there are no new messages to poll from
_write_values_in_db
(
mariadb
C
ursor
,
record
V
alues
ForDB
)
mariadb
C
onnection
.
commit
()
record
V
alues
ForDB
=
[]
_write_values_in_db
(
mariadb
_c
ursor
,
record
_v
alues
_for_db
)
mariadb
_c
onnection
.
commit
()
record
_v
alues
_for_db
=
[]
consumer
.
commit
()
# <-- uncomment this for production!
# if readMessageCounter >= 100:
# break
...
...
@@ -264,10 +288,10 @@ class MediametadataToDB():
try
:
# to be used when inside a kubernetes cluster
config
.
load_incluster_config
()
except
Base
Exception
:
except
K8sConfig
Exception
:
try
:
# use .kube directory
# for local development
config
.
load_kube_config
()
except
Base
Exception
:
except
K8sConfig
Exception
:
logging
.
error
(
"No kubernetes cluster defined"
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment