Unverified Commit 8cbc9c67 authored by Sebastian Schüpbach's avatar Sebastian Schüpbach
Browse files

fix http locator check


Signed-off-by: Sebastian Schüpbach's avatarSebastian Schüpbach <sebastian.schuepbach@unibas.ch>
parent 0820fa20
Pipeline #17935 passed with stages
in 1 minute and 54 seconds
...@@ -80,7 +80,7 @@ def _connect_to_mariadb(retries=0): ...@@ -80,7 +80,7 @@ def _connect_to_mariadb(retries=0):
def _try_fetch_from_json_object(record_json_data, def _try_fetch_from_json_object(record_json_data,
fetch_from_obj_fun, fetch_from_obj_fun,
access_status): access_status) -> dict:
if 'locator' in record_json_data and '@id' in record_json_data: if 'locator' in record_json_data and '@id' in record_json_data:
return fetch_from_obj_fun(record_json_data, access_status) return fetch_from_obj_fun(record_json_data, access_status)
elif '@id' in record_json_data: elif '@id' in record_json_data:
...@@ -88,10 +88,10 @@ def _try_fetch_from_json_object(record_json_data, ...@@ -88,10 +88,10 @@ def _try_fetch_from_json_object(record_json_data,
' does not have a locator-property.') ' does not have a locator-property.')
else: else:
logging.warning('Record without @id-property detected!') logging.warning('Record without @id-property detected!')
return None return dict()
def _get_values_from_thumbnail_object(msg, _access_status): def _get_values_from_thumbnail_object(msg, _access_status) -> dict:
return_values = {'mimetype': 'image/jp2', 'type': 'image', 'access': 'public', 'proto': 'file', return_values = {'mimetype': 'image/jp2', 'type': 'image', 'access': 'public', 'proto': 'file',
'sig': '{}-poster'.format(msg['@id'].split('/')[-2]), 'sig': '{}-poster'.format(msg['@id'].split('/')[-2]),
'uri': 'file:///data/{}-poster.jp2'.format(msg['@id'].split('/')[-2])} 'uri': 'file:///data/{}-poster.jp2'.format(msg['@id'].split('/')[-2])}
...@@ -104,10 +104,10 @@ def _get_values_from_thumbnail_object(msg, _access_status): ...@@ -104,10 +104,10 @@ def _get_values_from_thumbnail_object(msg, _access_status):
return return_values return return_values
def _get_values_from_digital_object(msg, access_status): def _get_values_from_digital_object(msg, access_status) -> dict:
if 'isDistributedOn' not in msg: if 'isDistributedOn' not in msg:
logging.warning("No isDistributedOn property found in object") logging.warning("No isDistributedOn property found in object")
return None return dict()
file_extension = '' file_extension = ''
return_values = {'access': access_status, 'sig': msg['@id'].split('/')[-1]} return_values = {'access': access_status, 'sig': msg['@id'].split('/')[-1]}
if 'height' in msg: if 'height' in msg:
...@@ -144,18 +144,18 @@ def _get_values_from_digital_object(msg, access_status): ...@@ -144,18 +144,18 @@ def _get_values_from_digital_object(msg, access_status):
return return_values return return_values
def _is_remote_file(msg): def _is_remote_file(msg) -> bool:
return 'locator' in msg and not \ return 'locator' in msg and not \
msg['locator'].startswith('https://memobase.ch/') msg['locator'].startswith('https://memobase.ch/')
def _is_directly_fetchable(digital_object_resource): def _is_directly_fetchable(digital_object_resource) -> bool:
return digital_object_resource['isDistributedOn'] == 'audio' or \ return digital_object_resource['isDistributedOn'] == 'audio' or \
digital_object_resource['isDistributedOn'] == 'image' or \ digital_object_resource['isDistributedOn'] == 'image' or \
digital_object_resource['isDistributedOn'] == 'video' digital_object_resource['isDistributedOn'] == 'video'
def _get_access_status(graph, record_id): def _get_access_status(graph, record_id) -> str:
for resource in graph: for resource in graph:
if 'type' in resource and resource['type'] == 'access' and \ if 'type' in resource and resource['type'] == 'access' and \
'regulates' in resource and \ 'regulates' in resource and \
...@@ -176,14 +176,14 @@ def _get_access_status(graph, record_id): ...@@ -176,14 +176,14 @@ def _get_access_status(graph, record_id):
return 'unavailable' return 'unavailable'
def _get_record_id(graph): def _get_record_id(graph) -> str:
for resource in graph: for resource in graph:
if '@type' in resource and resource['@type'] == \ if '@type' in resource and resource['@type'] == \
'https://www.ica.org/standards/RiC/ontology#Record': 'https://www.ica.org/standards/RiC/ontology#Record':
return resource['@id'] if '@id' in resource else None return resource['@id'] if '@id' in resource else None
def _create_sql_stmt(table_name, record, fields): def _create_sql_stmt(table_name, record, fields) -> str:
db_fields = [dbField for dbField in fields db_fields = [dbField for dbField in fields
if dbField in record and record[dbField] is not None] if dbField in record and record[dbField] is not None]
db_values = ','.join([str(record[db_field]) db_values = ','.join([str(record[db_field])
...@@ -208,15 +208,15 @@ def _create_metadata_entry(record, mariadb_cursor): ...@@ -208,15 +208,15 @@ def _create_metadata_entry(record, mariadb_cursor):
mariadb_cursor.execute(sql_stmt) mariadb_cursor.execute(sql_stmt)
def _has_audio_snippet(record): def _has_audio_snippet(record) -> bool:
return record['type'] == 'audio' and \ return record['type'] == 'audio' and \
'uri' in record and \ 'uri' in record and \
record['uri'].startswith('file://') record['uri'].startswith('file://')
# TODO: Eventually remove # TODO: Eventually remove
def _has_http_locator(digital_object): def _has_http_locator(digital_object) -> bool:
'locator' in digital_object and digital_object['locator'].startswith('http') return 'locator' in digital_object and digital_object['locator'].startswith('http')
def _create_audio_snippet_entry(record, mariadb_cursor): def _create_audio_snippet_entry(record, mariadb_cursor):
...@@ -304,11 +304,6 @@ class MediametadataToDB: ...@@ -304,11 +304,6 @@ class MediametadataToDB:
_try_fetch_from_json_object(record_resource, _try_fetch_from_json_object(record_resource,
_get_values_from_thumbnail_object, _get_values_from_thumbnail_object,
access_status) access_status)
if not enriched_data:
reporter.send_message(record_id,
"FATAL",
"Could not process thumbnail object")
else:
record_values_for_db.append(enriched_data) record_values_for_db.append(enriched_data)
if not enrichable: if not enrichable:
reporter.send_message(record_id, reporter.send_message(record_id,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment