Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
memoriav
Memobase 2020
services
postprocessing
Media Metadata Indexer
Commits
8cbc9c67
Unverified
Commit
8cbc9c67
authored
Nov 27, 2020
by
Sebastian Schüpbach
Browse files
fix http locator check
Signed-off-by:
Sebastian Schüpbach
<
sebastian.schuepbach@unibas.ch
>
parent
0820fa20
Pipeline
#17935
passed with stages
in 1 minute and 54 seconds
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
mediametadatatodb_app/resources/MediametadataToDB.py
View file @
8cbc9c67
...
@@ -80,7 +80,7 @@ def _connect_to_mariadb(retries=0):
...
@@ -80,7 +80,7 @@ def _connect_to_mariadb(retries=0):
def
_try_fetch_from_json_object
(
record_json_data
,
def
_try_fetch_from_json_object
(
record_json_data
,
fetch_from_obj_fun
,
fetch_from_obj_fun
,
access_status
):
access_status
)
->
dict
:
if
'locator'
in
record_json_data
and
'@id'
in
record_json_data
:
if
'locator'
in
record_json_data
and
'@id'
in
record_json_data
:
return
fetch_from_obj_fun
(
record_json_data
,
access_status
)
return
fetch_from_obj_fun
(
record_json_data
,
access_status
)
elif
'@id'
in
record_json_data
:
elif
'@id'
in
record_json_data
:
...
@@ -88,10 +88,10 @@ def _try_fetch_from_json_object(record_json_data,
...
@@ -88,10 +88,10 @@ def _try_fetch_from_json_object(record_json_data,
' does not have a locator-property.'
)
' does not have a locator-property.'
)
else
:
else
:
logging
.
warning
(
'Record without @id-property detected!'
)
logging
.
warning
(
'Record without @id-property detected!'
)
return
None
return
dict
()
def
_get_values_from_thumbnail_object
(
msg
,
_access_status
):
def
_get_values_from_thumbnail_object
(
msg
,
_access_status
)
->
dict
:
return_values
=
{
'mimetype'
:
'image/jp2'
,
'type'
:
'image'
,
'access'
:
'public'
,
'proto'
:
'file'
,
return_values
=
{
'mimetype'
:
'image/jp2'
,
'type'
:
'image'
,
'access'
:
'public'
,
'proto'
:
'file'
,
'sig'
:
'{}-poster'
.
format
(
msg
[
'@id'
].
split
(
'/'
)[
-
2
]),
'sig'
:
'{}-poster'
.
format
(
msg
[
'@id'
].
split
(
'/'
)[
-
2
]),
'uri'
:
'file:///data/{}-poster.jp2'
.
format
(
msg
[
'@id'
].
split
(
'/'
)[
-
2
])}
'uri'
:
'file:///data/{}-poster.jp2'
.
format
(
msg
[
'@id'
].
split
(
'/'
)[
-
2
])}
...
@@ -104,10 +104,10 @@ def _get_values_from_thumbnail_object(msg, _access_status):
...
@@ -104,10 +104,10 @@ def _get_values_from_thumbnail_object(msg, _access_status):
return
return_values
return
return_values
def
_get_values_from_digital_object
(
msg
,
access_status
):
def
_get_values_from_digital_object
(
msg
,
access_status
)
->
dict
:
if
'isDistributedOn'
not
in
msg
:
if
'isDistributedOn'
not
in
msg
:
logging
.
warning
(
"No isDistributedOn property found in object"
)
logging
.
warning
(
"No isDistributedOn property found in object"
)
return
None
return
dict
()
file_extension
=
''
file_extension
=
''
return_values
=
{
'access'
:
access_status
,
'sig'
:
msg
[
'@id'
].
split
(
'/'
)[
-
1
]}
return_values
=
{
'access'
:
access_status
,
'sig'
:
msg
[
'@id'
].
split
(
'/'
)[
-
1
]}
if
'height'
in
msg
:
if
'height'
in
msg
:
...
@@ -144,18 +144,18 @@ def _get_values_from_digital_object(msg, access_status):
...
@@ -144,18 +144,18 @@ def _get_values_from_digital_object(msg, access_status):
return
return_values
return
return_values
def
_is_remote_file
(
msg
):
def
_is_remote_file
(
msg
)
->
bool
:
return
'locator'
in
msg
and
not
\
return
'locator'
in
msg
and
not
\
msg
[
'locator'
].
startswith
(
'https://memobase.ch/'
)
msg
[
'locator'
].
startswith
(
'https://memobase.ch/'
)
def
_is_directly_fetchable
(
digital_object_resource
):
def
_is_directly_fetchable
(
digital_object_resource
)
->
bool
:
return
digital_object_resource
[
'isDistributedOn'
]
==
'audio'
or
\
return
digital_object_resource
[
'isDistributedOn'
]
==
'audio'
or
\
digital_object_resource
[
'isDistributedOn'
]
==
'image'
or
\
digital_object_resource
[
'isDistributedOn'
]
==
'image'
or
\
digital_object_resource
[
'isDistributedOn'
]
==
'video'
digital_object_resource
[
'isDistributedOn'
]
==
'video'
def
_get_access_status
(
graph
,
record_id
):
def
_get_access_status
(
graph
,
record_id
)
->
str
:
for
resource
in
graph
:
for
resource
in
graph
:
if
'type'
in
resource
and
resource
[
'type'
]
==
'access'
and
\
if
'type'
in
resource
and
resource
[
'type'
]
==
'access'
and
\
'regulates'
in
resource
and
\
'regulates'
in
resource
and
\
...
@@ -176,14 +176,14 @@ def _get_access_status(graph, record_id):
...
@@ -176,14 +176,14 @@ def _get_access_status(graph, record_id):
return
'unavailable'
return
'unavailable'
def
_get_record_id
(
graph
):
def
_get_record_id
(
graph
)
->
str
:
for
resource
in
graph
:
for
resource
in
graph
:
if
'@type'
in
resource
and
resource
[
'@type'
]
==
\
if
'@type'
in
resource
and
resource
[
'@type'
]
==
\
'https://www.ica.org/standards/RiC/ontology#Record'
:
'https://www.ica.org/standards/RiC/ontology#Record'
:
return
resource
[
'@id'
]
if
'@id'
in
resource
else
None
return
resource
[
'@id'
]
if
'@id'
in
resource
else
None
def
_create_sql_stmt
(
table_name
,
record
,
fields
):
def
_create_sql_stmt
(
table_name
,
record
,
fields
)
->
str
:
db_fields
=
[
dbField
for
dbField
in
fields
db_fields
=
[
dbField
for
dbField
in
fields
if
dbField
in
record
and
record
[
dbField
]
is
not
None
]
if
dbField
in
record
and
record
[
dbField
]
is
not
None
]
db_values
=
','
.
join
([
str
(
record
[
db_field
])
db_values
=
','
.
join
([
str
(
record
[
db_field
])
...
@@ -208,15 +208,15 @@ def _create_metadata_entry(record, mariadb_cursor):
...
@@ -208,15 +208,15 @@ def _create_metadata_entry(record, mariadb_cursor):
mariadb_cursor
.
execute
(
sql_stmt
)
mariadb_cursor
.
execute
(
sql_stmt
)
def
_has_audio_snippet
(
record
):
def
_has_audio_snippet
(
record
)
->
bool
:
return
record
[
'type'
]
==
'audio'
and
\
return
record
[
'type'
]
==
'audio'
and
\
'uri'
in
record
and
\
'uri'
in
record
and
\
record
[
'uri'
].
startswith
(
'file://'
)
record
[
'uri'
].
startswith
(
'file://'
)
# TODO: Eventually remove
# TODO: Eventually remove
def
_has_http_locator
(
digital_object
):
def
_has_http_locator
(
digital_object
)
->
bool
:
'locator'
in
digital_object
and
digital_object
[
'locator'
].
startswith
(
'http'
)
return
'locator'
in
digital_object
and
digital_object
[
'locator'
].
startswith
(
'http'
)
def
_create_audio_snippet_entry
(
record
,
mariadb_cursor
):
def
_create_audio_snippet_entry
(
record
,
mariadb_cursor
):
...
@@ -304,12 +304,7 @@ class MediametadataToDB:
...
@@ -304,12 +304,7 @@ class MediametadataToDB:
_try_fetch_from_json_object
(
record_resource
,
_try_fetch_from_json_object
(
record_resource
,
_get_values_from_thumbnail_object
,
_get_values_from_thumbnail_object
,
access_status
)
access_status
)
if
not
enriched_data
:
record_values_for_db
.
append
(
enriched_data
)
reporter
.
send_message
(
record_id
,
"FATAL"
,
"Could not process thumbnail object"
)
else
:
record_values_for_db
.
append
(
enriched_data
)
if
not
enrichable
:
if
not
enrichable
:
reporter
.
send_message
(
record_id
,
reporter
.
send_message
(
record_id
,
"IGNORE"
,
"IGNORE"
,
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment