Commit 33ef8808 authored by Thomas Bernhart's avatar Thomas Bernhart
Browse files

Handle case when no datastream with ID TRANSFORMED_METADATA_0 exists

parent bdae2e48
import logging
import os
import sys
import traceback
from os import listdir, makedirs, path, walk
from shutil import copy2
from ch.memobase.media import MediaFileSearcher
from ch.memobase.records_sets import RecordSetIdMapper
from ch.memobase.foxml import FoxmlReader
from ch.memobase.foxml import FoxmlParsingError
def _copy_file(source_file, destination_directory, destination_filename):
......@@ -45,9 +43,13 @@ def foxml_export(objectstore_path, output_path, recordsets_csv_file):
for r, d, f in walk(objectstore_path): # r=root, d=directories, f = files
for file in f:
foxml_path = path.join(r, file)
logger.debug("Parsing FOXML file: '" + foxml_path + "'")
try:
logger.debug("Parsing FOXML file: '" + foxml_path + "'")
foxml_reader = FoxmlReader(foxml_path)
except FoxmlParsingError as parsing_error:
logging.warning(parsing_error)
else:
old_record_set_id = foxml_reader.get_recordset_id()
if old_record_set_id is not None:
logger.debug("FOXML file " + foxml_path + " belongs to record set " + old_record_set_id)
......@@ -66,8 +68,6 @@ def foxml_export(objectstore_path, output_path, recordsets_csv_file):
else:
logger.warning("Ignored FOXML file '" + foxml_path +
"': No recordset ID found in FOXML")
except: # catch *all* exceptions
traceback.print_exc(limit=1, file=sys.stdout)
logger.info("Finished FOXML export")
......
......@@ -72,6 +72,10 @@ def _calculate_data_stream_path(datastream_id):
return path.join(hash_object.hexdigest()[0:2], quoted_full_id)
class FoxmlParsingError(Exception):
pass
class FoxmlReader:
NAMESPACES = {
'foxml': 'info:fedora/fedora-system:def/foxml#',
......@@ -83,9 +87,13 @@ class FoxmlReader:
def __init__(self, file):
root_element = ElementTree.parse(file).getroot()
self.metadata_datastream_element = _get_metadata_datastream_element(root_element)
self.thumbnail_datastream_element = _get_thumbnail_datastream_element(root_element)
self.accesscopy_datastream_element = _get_accesscopy_datastream_element(root_element)
metadata_datastream_element = _get_metadata_datastream_element(root_element)
if self.metadata_datastream_element is not None:
self.metadata_datastream_element = metadata_datastream_element
self.thumbnail_datastream_element = _get_thumbnail_datastream_element(root_element)
self.accesscopy_datastream_element = _get_accesscopy_datastream_element(root_element)
else:
raise FoxmlParsingError("FOXML file '" + file + "' has no datastream with ID 'TRANSFORMED_METADATA_0'")
def get_recordset_id(self):
return _get_element_text(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment