In order to mitigate against the brute force attacks against Gitlab accounts, we are moving to all edu-ID Logins. We would like to remind you to link your account with your edu-id. Login will be possible only by edu-ID after November 30, 2021. Here you can find the instructions for linking your account.

If you don't have a SWITCH edu-ID, you can create one with this guide here

kind regards

This Server has been upgraded to GitLab release 14.2.6

Unverified Commit 8cbc9c67 authored by Sebastian Schüpbach's avatar Sebastian Schüpbach
Browse files

fix http locator check


Signed-off-by: Sebastian Schüpbach's avatarSebastian Schüpbach <sebastian.schuepbach@unibas.ch>
parent 0820fa20
Pipeline #17935 passed with stages
in 1 minute and 54 seconds
......@@ -80,7 +80,7 @@ def _connect_to_mariadb(retries=0):
def _try_fetch_from_json_object(record_json_data,
fetch_from_obj_fun,
access_status):
access_status) -> dict:
if 'locator' in record_json_data and '@id' in record_json_data:
return fetch_from_obj_fun(record_json_data, access_status)
elif '@id' in record_json_data:
......@@ -88,10 +88,10 @@ def _try_fetch_from_json_object(record_json_data,
' does not have a locator-property.')
else:
logging.warning('Record without @id-property detected!')
return None
return dict()
def _get_values_from_thumbnail_object(msg, _access_status):
def _get_values_from_thumbnail_object(msg, _access_status) -> dict:
return_values = {'mimetype': 'image/jp2', 'type': 'image', 'access': 'public', 'proto': 'file',
'sig': '{}-poster'.format(msg['@id'].split('/')[-2]),
'uri': 'file:///data/{}-poster.jp2'.format(msg['@id'].split('/')[-2])}
......@@ -104,10 +104,10 @@ def _get_values_from_thumbnail_object(msg, _access_status):
return return_values
def _get_values_from_digital_object(msg, access_status):
def _get_values_from_digital_object(msg, access_status) -> dict:
if 'isDistributedOn' not in msg:
logging.warning("No isDistributedOn property found in object")
return None
return dict()
file_extension = ''
return_values = {'access': access_status, 'sig': msg['@id'].split('/')[-1]}
if 'height' in msg:
......@@ -144,18 +144,18 @@ def _get_values_from_digital_object(msg, access_status):
return return_values
def _is_remote_file(msg):
def _is_remote_file(msg) -> bool:
return 'locator' in msg and not \
msg['locator'].startswith('https://memobase.ch/')
def _is_directly_fetchable(digital_object_resource):
def _is_directly_fetchable(digital_object_resource) -> bool:
return digital_object_resource['isDistributedOn'] == 'audio' or \
digital_object_resource['isDistributedOn'] == 'image' or \
digital_object_resource['isDistributedOn'] == 'video'
def _get_access_status(graph, record_id):
def _get_access_status(graph, record_id) -> str:
for resource in graph:
if 'type' in resource and resource['type'] == 'access' and \
'regulates' in resource and \
......@@ -176,14 +176,14 @@ def _get_access_status(graph, record_id):
return 'unavailable'
def _get_record_id(graph):
def _get_record_id(graph) -> str:
for resource in graph:
if '@type' in resource and resource['@type'] == \
'https://www.ica.org/standards/RiC/ontology#Record':
return resource['@id'] if '@id' in resource else None
def _create_sql_stmt(table_name, record, fields):
def _create_sql_stmt(table_name, record, fields) -> str:
db_fields = [dbField for dbField in fields
if dbField in record and record[dbField] is not None]
db_values = ','.join([str(record[db_field])
......@@ -208,15 +208,15 @@ def _create_metadata_entry(record, mariadb_cursor):
mariadb_cursor.execute(sql_stmt)
def _has_audio_snippet(record):
def _has_audio_snippet(record) -> bool:
return record['type'] == 'audio' and \
'uri' in record and \
record['uri'].startswith('file://')
# TODO: Eventually remove
def _has_http_locator(digital_object):
'locator' in digital_object and digital_object['locator'].startswith('http')
def _has_http_locator(digital_object) -> bool:
return 'locator' in digital_object and digital_object['locator'].startswith('http')
def _create_audio_snippet_entry(record, mariadb_cursor):
......@@ -304,12 +304,7 @@ class MediametadataToDB:
_try_fetch_from_json_object(record_resource,
_get_values_from_thumbnail_object,
access_status)
if not enriched_data:
reporter.send_message(record_id,
"FATAL",
"Could not process thumbnail object")
else:
record_values_for_db.append(enriched_data)
record_values_for_db.append(enriched_data)
if not enrichable:
reporter.send_message(record_id,
"IGNORE",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment