Due to a scheduled upgrade to version 14.10, GitLab will be unavailabe on Monday 30.05., from 19:00 until 20:00.

processor.py 6.44 KB
Newer Older
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
1
import logging
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
2

3
4
5
from mediametadatatodb_app.resources.indexer import Indexer
from mediametadatatodb_app.resources.reporter import Reporter

Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
6
7
8
9
10
11
12
13
14
15

class RecordProcessor:
    def __init__(self):
        self.counter = 0
        self.indexer = Indexer()
        self.processed_records = dict()
        self.reporter = Reporter()

    @staticmethod
    def _parsing_errors(record) -> bool:
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
        return RecordProcessor._parsing_failed_digital_object(record) or \
               RecordProcessor._parsing_failed_thumbnail(record) or \
               RecordProcessor._parsing_failed_audio_snippet(record)

    @staticmethod
    def _parsing_failed_digital_object(record) -> bool:
        return 'digital_object' in record and not record['digital_object']['ok']

    @staticmethod
    def _parsing_failed_thumbnail(record) -> bool:
        return 'thumbnail' in record and not record['thumbnail']['ok']

    @staticmethod
    def _parsing_failed_audio_snippet(record) -> bool:
        return 'audio_snippet' in record and not record['audio_snippet']['ok']
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
31

32
33
    def new_record(self, rec_id, headers):
        self.processed_records[rec_id] = {'headers': headers}
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
34
35

    def digital_object_ok(self, rec_id, data):
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
36
        logging.debug(f"Parsing of digital object resource for {rec_id} successful")
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
37
38
39
40
        self.counter += 1
        self.processed_records[rec_id]['digital_object'] = \
            {'data': data,
             'ok': True,
41
             'ignored': False,
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
42
43
             'msg': 'successful'}

44
    def digital_object_fail(self, rec_id, err):
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
45
        logging.warning(f"Parsing of digital object resource for {rec_id} failed")
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
46
47
48
        self.processed_records[rec_id]['digital_object'] = \
            {'data': None,
             'ok': False,
49
             'ignored': False,
50
             'msg': 'parsing failed' + f': {err}' if err else ''}
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
51

52
53
54
55
56
57
58
59
    def digital_object_ignore(self, rec_id, message):
        logging.info(f"Digital object resource for {rec_id} ignored")
        self.processed_records[rec_id]['digital_object'] = \
            {'data': None,
             'ok': True,
             'ignored': True,
             'msg': message}

Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
60
    def thumbnail_ok(self, rec_id, data):
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
61
        logging.debug(f"Parsing of thumbnail resource for {rec_id} successful")
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
62
63
64
65
        self.counter += 1
        self.processed_records[rec_id]['thumbnail'] = \
            {'data': data,
             'ok': True,
66
             'ignored': False,
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
67
68
             'msg': 'successful'}

69
    def thumbnail_fail(self, rec_id, err):
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
70
        logging.warning(f"Parsing of thumbnail resource for {rec_id} failed")
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
71
72
73
        self.processed_records[rec_id]['thumbnail'] = \
            {'data': None,
             'ok': False,
74
             'ignored': False,
75
             'msg': 'parsing failed' + f': {err}' if err else ''}
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
76
77

    def audio_snippet_ok(self, rec_id, data):
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
78
        logging.debug(f"Parsing of audio snippet resource for {rec_id} successful")
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
79
80
81
82
        self.counter += 1
        self.processed_records[rec_id]['audio_snippet'] = \
            {'data': data,
             'ok': True,
83
             'ignored': False,
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
84
85
86
87
             'msg': 'successful'
             }

    def audio_snippet_fail(self, rec_id):
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
88
        logging.warning(f"Parsing of audio snippet resource for {rec_id} failed")
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
89
90
91
        self.processed_records[rec_id]['audio_snippet'] = \
            {'data': None,
             'ok': False,
92
             'ignored': False,
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
93
94
95
             'msg': 'parsing failed'}

    def abort(self, ex):
96
        logging.error("Indexing failed. Aborting...")
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
97
        for key in self.processed_records.keys():
98
            self.reporter.send_message(key, 'FATAL', f'Indexing failed: {ex}',
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
99
                                       self.processed_records[key]['headers'])
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
100

101
    def index(self):
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
102
103
104
        for key in self.processed_records.keys():
            record = self.processed_records[key]
            dig_obj_msg = \
105
                record['digital_object']['msg'] if 'digital_object' in record else 'not available'
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
106
            thumbnail_msg = \
107
                record['thumbnail']['msg'] if 'thumbnail' in record else 'not available'
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
108
            audio_snip_msg = \
109
                record['audio_snippet']['msg'] if 'audio_snippet' in record else 'not available'
110
            ok = True
111
            ignored = True
112
113
114
115
            err_msg = ""
            if 'digital_object' in record and not record['digital_object']['ignored'] \
                    and not RecordProcessor._parsing_failed_digital_object(record):
                logging.debug(f"Indexing digital object for {key} in DB")
116
                ignored = False
117
118
119
120
                ok, err_msg = self.indexer.insert_in_db(record['digital_object']['data'])
            if ok and 'thumbnail' in record and not record['thumbnail']['ignored'] \
                    and not RecordProcessor._parsing_failed_thumbnail(record):
                logging.debug(f"Indexing thumbnail for {key} in DB")
121
                ignored = False
122
123
124
125
                ok, err_msg = self.indexer.insert_in_db(record['thumbnail']['data'])
            if ok and 'audio_snippet' in record and not record['audio_snippet']['ignored'] \
                    and not RecordProcessor._parsing_failed_audio_snippet(record):
                logging.debug(f"Indexing audio snippet for {key} in DB")
126
                ignored = False
127
128
129
                ok, err_msg = self.indexer.insert_in_db(record['audio_snippet']['data'])
            if ok and not RecordProcessor._parsing_errors(record):
                self.indexer.commit()
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
130
                self.reporter.send_message(key, 'IGNORE' if ignored else 'SUCCESS',
131
132
133
                                           ('DIGITAL OBJECT: {} -- THUMBNAIL: {}' +
                                            ' -- AUDIO SNIPPET: {}').format(
                                               dig_obj_msg, thumbnail_msg, audio_snip_msg
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
134
                                           ), record['headers'])
135
136
            elif ok:
                self.indexer.commit()
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
137
                self.reporter.send_message(key, 'FATAL',
138
139
140
                                           ('DIGITAL OBJECT: {} -- THUMBNAIL: {} ' +
                                            '-- AUDIO SNIPPET: {}')
                                           .format(dig_obj_msg, thumbnail_msg, audio_snip_msg),
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
141
                                           record['headers'])
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
142
            else:
143
144
145
                self.indexer.rollback()
                self.reporter.send_message(key, 'FATAL',
                                           f'Indexing failed: {err_msg}',
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
146
                                           record['headers'])
Sebastian Schüpbach's avatar
Sebastian Schüpbach committed
147
        self.processed_records.clear()