Commit bdae2e48 authored by Thomas Bernhart's avatar Thomas Bernhart
Browse files

Remove obsolete comments

parent c2d800d9
#!/usr/bin/env python
#!/usr/bin/env python3
# coding: utf-8
from ch.memobase.export import foxml_export
import logging
import os
import sys
import traceback
from ch.memobase.foxml import parse_into_fedora_object
from ch.memobase.foxml import FoxmlReader
from ch.memobase.records_sets import RecordSetIdMapper
from shutil import copy2
# class FoxmlParsingError(Exception):
# pass
#
#
# def get_last_element(tree, xpath_expression, namespaces, sort_by_attrib):
# elements = tree.findall(xpath_expression, namespaces)
# if (len(elements) > 0):
# elements.sort(reverse=True, key=lambda elem:elem.attrib[sort_by_attrib])
# return elements[0]
# else:
# return None
#
#
# def parse_into_fedora_object(tree, namespaces, metadata_datastream_version):
# relation_el = metadata_datastream_version.find("foxml:xmlContent/ebucore:ebuCoreMain/ebucore:coreMetadata/ebucore:isMemberOf/ns2:relation", namespaces)
#
# if (relation_el is None or relation_el.text is None):
# return None
# else:
# fedora_object = {}
# fedora_object['record_set'] = relation_el.text
#
# identifier_el = metadata_datastream_version.find("foxml:xmlContent/ebucore:ebuCoreMain/ebucore:coreMetadata/ebucore:identifier[@typeLabel='Original']/ns2:identifier", namespaces)
# if (identifier_el is not None and identifier_el.text is not None):
# fedora_object['document_id'] = identifier_el.text
#
# return fedora_object
#
#
# def parse_foxml(file):
# namespaces = {
# 'foxml': 'info:fedora/fedora-system:def/foxml#',
# 'oai_dc': 'http://www.openarchives.org/OAI/2.0/oai_dc/',
# 'dc': 'http://purl.org/dc/elements/1.1/',
# 'ebucore': 'urn:ebu:metadata-schema:ebuCore_2012',
# 'ns2': 'http://purl.org/dc/elements/1.1/'
# }
#
# tree = ET.parse(file)
#
# metadata_datastream_version = get_last_element(tree, "foxml:datastream[@ID='TRANSFORMED_METADATA_0']/foxml:datastreamVersion[@LABEL='Internal Memobase Metadata']", namespaces, 'CREATED')
# if (metadata_datastream_version is None):
# return None
# else:
# return parse_into_fedora_object(tree, namespaces, metadata_datastream_version)
objectstore_path = '/mnt/docuteam-intern/scratch/570-8_Memobase-Datenexport_loeschen-2021_be/Datenexport/objectStore'
# output_directory = '/mnt/docuteam-intern/scratch/570-8_Memobase-Datenexport_loeschen-2021_be/sftp_20210228'
output_directory = './sftp_20210228'
foxml_export(objectstore_path, output_directory, "./record_sets_ids.csv")
# recordset_id_mapper = RecordSetIdMapper('./record_sets_ids.csv')
#
# if not os.path.exists(output_directory):
# os.makedirs(output_directory)
#
# for r, d, f in os.walk(objectstore_path): # r=root, d=directories, f = files
# for file in f:
# foxml_path = os.path.join(r, file)
# try:
# logging.info("Parsing FOXML file: " + foxml_path)
# foxml_reader = FoxmlReader(foxml_path)
# old_record_set_id = foxml_reader.get_recordset_id()
# logging.debug("FOXML file " + foxml_path + " belongs to record set " + old_record_set_id)
#
# new_record_set_id = recordset_id_mapper.get_new_record_set_id(old_record_set_id)
# record_set_export_path = os.path.join(output_directory, new_record_set_id)
# if not os.path.exists(record_set_export_path):
# os.makedirs(record_set_export_path)
# foxml_destination_path = os.path.join(record_set_export_path, os.path.basename(foxml_path) + ".xml")
# copy2(foxml_path, foxml_destination_path, follow_symlinks=False)
# logging.info("Exported FOXML file '" + foxml_path + "' to '" + foxml_destination_path + "'")
# except: # catch *all* exceptions
# traceback.print_exc(limit=1, file=sys.stdout)
#
# logging.info("Finished FOXML export")
#!/usr/bin/env python
#!/usr/bin/env python3
# coding: utf-8
from ch.memobase.export import foxml_export, media_export
......
#!/usr/bin/env python
#!/usr/bin/env python3
# coding: utf-8
from ch.memobase.export import media_export
......@@ -8,106 +8,3 @@ http_files_path = '/mnt/docuteam-intern/scratch/570-8_Memobase-Datenexport_loesc
rtmp_files_path = '/mnt/docuteam-intern/scratch/570-8_Memobase-Datenexport_loeschen-2021_be/Datenexport/library'
media_export(exported_record_set_path, http_files_path, rtmp_files_path)
import os
import sy
import traceback
import xml.etree.ElementTree as ET
from hashlib import md5
from shutil import copy2
from urllib.parse import quote
# def copy_thumbnail(datastream_store_path, thumbnail_directory, fedora_object):
# if not os.path.exists(thumbnail_directory):
# os.makedirs(thumbnail_directory)
#
# thumbnail_filename = fedora_object['document_id'] + os.path.splitext(fedora_object['thumbnail_original_filename'])[1]
# destination_path = os.path.join(thumbnail_directory, thumbnail_filename)
# thumbnail_path = calculate_data_stream_path(datastream_store_path, fedora_object['thumbnail_ref'])
# copy2(thumbnail_path, destination_path, follow_symlinks=False)
# print("Copied '{}' to '{}'".format(thumbnail_path, destination_path))
#
#
# def copy_accesscopy(datastream_store_path, accesscopy_directory, fedora_object):
# if not os.path.exists(accesscopy_directory):
# os.makedirs(accesscopy_directory)
#
# accesscopy_filename = fedora_object['document_id'] + os.path.splitext(fedora_object['accesscopy_original_filename'])[1]
# destination_path = os.path.join(accesscopy_directory, accesscopy_filename)
# accesscopy_path = calculate_data_stream_path(datastream_store_path, fedora_object['accesscopy_ref'])
# copy2(accesscopy_path, destination_path, follow_symlinks=False)
# print("Copied '{}' to '{}'".format(accesscopy_path, destination_path))
#
#
# def copy_http_resource(accesscopy_path, accesscopy_directory, fedora_object):
# if not os.path.exists(accesscopy_directory):
# os.makedirs(accesscopy_directory)
#
# locator = fedora_object['locator']
# source_path = os.path.join(http_files_path, locator[len('https://memobase.ch/files/'):])
# destination_filename = fedora_object['document_id'] + os.path.splitext(accesscopy_path)[1]
# destination_path = os.path.join(accesscopy_directory, destination_filename)
# copy2(source_path, destination_path, follow_symlinks=False)
# print("Copied '{}' to '{}'".format(source_path, destination_path))
#
#
# def copy_rtmp_resource(rtmp_files_path, accesscopy_directory, fedora_object):
# if not os.path.exists(accesscopy_directory):
# os.makedirs(accesscopy_directory)
#
# locator = fedora_object['locator']
# rtmp_rel_path = locator[len('rtmp://intstream.memobase.ch:1935/memobase/'):]
# source_filename = rtmp_rel_path[rtmp_rel_path.find(':') + 1:]
# source_path1 = os.path.join(rtmp_files_path, source_filename)
# source_path2 = os.path.join(rtmp_files_path, 'open', source_filename)
# destination_filename = fedora_object['document_id'] + os.path.splitext(source_filename)[1]
# destination_path = os.path.join(accesscopy_directory, destination_filename)
# if (os.path.isfile(source_path1)):
# copy2(source_path1, destination_path, follow_symlinks=False)
# print("Copied '{}' to '{}'".format(source_path1, destination_path))
# elif (os.path.isfile(source_path2)):
# copy2(source_path2, destination_path, follow_symlinks=False)
# print("Copied '{}' to '{}'".format(source_path2, destination_path))
# else:
# raise Exception('rtmp resource not found.')
#
#
# # r=root, d=directories, f = files
# record_set_path = '/mnt/docuteam-intern/scratch/570-8_Memobase-Datenexport_loeschen-2021_be/Datenexport/objectStore'
# datastreamstore_path = '/mnt/docuteam-intern/scratch/570-8_Memobase-Datenexport_loeschen-2021_be/Datenexport/datastreamStore'
# http_files_path = '/mnt/docuteam-intern/scratch/570-8_Memobase-Datenexport_loeschen-2021_be/Datenexport/public-files'
# rtmp_files_path = '/mnt/docuteam-intern/scratch/570-8_Memobase-Datenexport_loeschen-2021_be/Datenexport/library'
#
# for foxml_file in os.listdir(objectstore_path):
# try:
# print("Parsing FOXML file: " + foxml_file)
# foxml_reader = FoxmlReader(foxml_path)
# print("Extracting files for: record set: " + fedora_object['record_set'] + ' ; document: ' + fedora_object['document_id'])
#
# record_set_path = os.path.join(record_set_path, fedora_object['record_set'])
# if not os.path.exists(record_set_path):
# os.makedirs(record_set_path)
#
# foxml_destination_path = os.path.join(record_set_path, os.path.basename(foxml_file) + ".xml")
# copy2(foxml_file, foxml_destination_path, follow_symlinks=False)
# './record_sets_ids.csv'./record_sets_ids.csv''
# if ('thumbnail_ref' in fedora_object):
# thumbnail_directory = os.path.join(record_set_path, fedora_object['record_set'], 'thumbnails')
# copy_thumbnail(datastreamstore_path, thumbnail_directory, fedora_object)
#
# accesscopy_directory = os.path.join(record_set_path, fedora_object['record_set'], 'media')
# if 'accesscopy_ref' in fedora_object:
# copy_accesscopy(datastreamstore_path, accesscopy_directory, fedora_object)
# elif 'locator' in fedora_object:
# # copy streaming resource
# if (fedora_object['locator'].startswith('https://memobase.ch/files/')):
# copy_http_resource(http_files_path, accesscopy_directory, fedora_object)
# elif (fedora_object['locator'].startswith('rtmp://intstream.memobase.ch:1935/memobase/')):
# copy_rtmp_resource(rtmp_files_path, accesscopy_directory, fedora_object)
#
# print("Successfully extracted any files")
# except: # catch *all* exceptions
# traceback.print_exc(limit=1, file=sys.stdout)
#
# print("Finished data extraction")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment