Commit 78b29688 authored by Jonas Waeber's avatar Jonas Waeber
Browse files

Refactor cli scripts

parent 0247f5f7
## Migration, Configuration & Utility Repository
This repository contains configurations and scripts to manage the migration, and the memobase services.
Each folder contains an additional README on what it is about.
### Import Process CLI
These files are used to run the migration of the memobase data.
The configurations for the prod and stage environments are maintained [here](https://gitlab.switch.ch/memoriav/memobase-2020/configurations/import-process).
## Files & Folders
To run these scripts a python installation is required with the package `requests`. The suffix of
the script determines on which infrastructure the request will be executed.
```bash
python start-prod.py record-set-memobase-id
```
To start a full migration use:
```bash
python migration-start-prod.py
```
### Prod
A list of all the import process settings used in the import processes for the production service.
### Stage
A list of all the import process settings used in the import processes for the stage service.
### Test
A list of all the configurations used in the import processes for the test service.
\ No newline at end of file
### Migration Configurations & Deployment
These files are used to run the migration of the memobase data.
The configurations for the prod and stage environments are maintained [here](https://gitlab.switch.ch/memoriav/memobase-2020/configurations/import-process).
## Files & Folders
To run these scripts a python installation is required with the package `requests`. The suffix of
the script determines on which infrastructure the request will be executed.
```bash
python start-prod.py record-set-memobase-id
```
To start a full migration use:
```bash
python migration-start-prod.py
```
* `institution_ids.csv` and `record_sets_ids.csv` are mappings between the old and new memobase ids.
### Prod
A list of all the configurations used in the import processes for the production service.
### Stage
A list of all the configurations used in the import processes for the stage service.
### Test
A list of all the configurations used in the import processes for the test service.
### Archived Configs
These are used for a number of datasets which were prepared for the migration with the
original excel metadata. They were never used, but can serve as examples.
### API
Some code to execute the request.
splitEntity:
- type: rico:Place
property: rico:name
delimiter: "," # additionally ";" is present in the data
- type: rico:Language
property: rico:name
delimiter: ","
normalizePerson:
splitEntity:
type: rico:Person
property: rico:name
delimiter: ","
creationRelationName: # only tries to extract a value if a DUMMY-VALUE rico:name is property is present in the relation.
pattern: "\\((?<relation>.+)\\)" # " are necessary to ensure pattern is parsed correctly. Pattern needs to be double escaped!
language: NONE
nameOrder: "first-to-last"
singleNameIsLastName: true
nameDelimiter: SPACE
\ No newline at end of file
record:
uri: ID informatique
identifiers: # blank node rico:Identifier
original: ID informatique
isSponsoredByMemoriav: true
type:
const: Film
title:
Titre utilisé (memobase)
titles:
main:
Titre utilisé (memobase)
descriptiveNote: # rico:descriptiveNote
fr: Description
scopeAndContent: # rico:scopeAndContent
- prefix:
value: "Titre utilisé: "
field: Titre utilisé
- prefix:
value: "Titre original: "
field: Titre original
- prefix:
value: "Titre traduit: "
field: Titre traduit
source: # rico:source
fr: Sources
genre:
- prefLabel:
fr: Genre
editorialNote:
const: "Ursprungsfeld: Genre"
placeOfCapture: # turned into a blank node rico:Place with relation rdau:P60556 "has place of capture"
name:
fr: Pays
creationDate: # blank node rico:DateSet with rico:expressedDate
date
creators: # a list of creators (with creationRelation rico:type)
- person:
name:
fr: Autor
relationName:
const: "Autor"
contributors:
- person:
name:
fr: Beteiligte mit rolle
- person:
name:
fr: Beteiligte ohne rolle
relationName:
- const: "SchauspielerIn"
- const: "DUMMY-VALUE" # needed to ensure that the creation relation name extraction becomes active!
producers: # rdau:P60441 has producer
corporateBody:
name:
fr: Société de production
languages: # blank node rico:Language with type content or caption and relation rico:hasLanguage
- content:
fr:
- Langue(s) piste(s) sonore(s)
- content:
fr:
- Langue(s) carton(s)
- caption:
fr:
- Langue(s) sous-titres
rights: # blank nodes rico:Rule with type "x"
holder: Rechtinhaber
physical:
physicalCharacteristics: # rico:physicalCharacteristics
fr:
- prefix:
value: "Procede Son: "
field: PROCEDE_SON
- prefix:
value: "Métrage: "
field: Métrage
- prefix:
value: "ID Film: "
field: ID film
colour: # rdau:P60558 has colour content (provisional field)
fr: Couleur
carrierType:
fr: Format
identifiers: # blank node rico:Identifier
callNumber: Cote
rights: # blank nodes rico:Rule with type access
usage:
name:
const: In Copyright (InC) # rights statement http://rightsstatements.org/vocab/InC/1.0/
sameAs:
const: http://rightsstatements.org/vocab/InC/1.0/
access:
const: onsite
\ No newline at end of file
{
"config-maps": [
{
"path": "configurations/cfcis/mappings/localTransforms.yml",
"name": "normalization-service-local-transform-config"
},
{
"path": "configurations/cfcis/mappings/mapping.yml",
"name": "mapper-service-mapping-config"
}
],
"global": {
"processId": "p1cis",
"institutionId": "cis",
"recordSetId": "cfcis"
},
"steps": [
"text-file-validation",
"table-data-transform",
"mapper-service",
"media-metadata-extractor",
"normalization-service",
"fedora-ingest-service"
],
"text-file-validation": {
"appDirectory": "./cfcis"
},
"table-data-transform": {
"sheetIndex": 1,
"headerCount": 1,
"headerLineIndex": 1,
"identifierIndex": 24
},
"media-metadata-extractor": {
"lastJobName": "mapper-service"
},
"normalization-service": {
"lastJobName": "media-metadata-extractor",
"hasLocalTransformConfig": true
}
}
\ No newline at end of file
splitEntity:
- type: skos:Concept
property: skos:prefLabel
delimiter: ";"
- type: rico:Place
property: rico:name
delimiter: ";"
normalizePerson:
nameOrder: "last-to-first"
singleNameIsLastName: true
nameDelimiter: ","
\ No newline at end of file
record:
uri: ID Original ID
identifiers: # blank node rico:Identifier
original: ID Original ID
isSponsoredByMemoriav: true
type:
const: Film
title:
de: Inhalt Haupttitel
titles:
main:
de: Inhalt Haupttitel
sameAs: # schema:sameAs
Permalink
abstract: # dct:abstract
de: Inhalt Beschreibung
descriptiveNote: # rico:descriptiveNote
de: Inhalt Beschreibung Bemerkung
scopeAndContent: # rico:scopeAndContent
de: Kontext Beschreibung Bemerkung
creationDate: # blank node rico:DateSet with rico:expressedDate
Kontext Erstellung
creators: # a list of creators (with creationRelation rico:type)
- person:
name:
de: Kontext AutorIn
relationName:
const: "Autor/in"
genre:
- prefLabel:
de: Inhalt Genre
editorialNote:
const: "Ursprungsfeld: Genre"
subject: # blank node skos:Concept with rico:hasSubject
- prefLabel:
de:
- Inhalt Schlagworte
editorialNote:
const: "Ursprungsfeld: Schlagworte"
placeOfCapture: # blank node rico:Place with relation rdau:P60556 "has place of capture"
name:
de: Kontext Aufnahmeort
languages: # blank node rico:Language with type content or caption and relation rico:hasLanguage
content:
de:
- Technische Informationen Tonaufnahmeverfahren
rights: # blank nodes rico:Rule with type "x"
holder:
de:
const: Heimatmuseum Arosa-Schanfigg
physical:
colour: # rdau:P60558 has colour content (provisional field)
de: Technische Informationen Film Farbe
carrierType:
de: Technische Informationen Trägerformat (des Originals)
duration: # ebucore:duration
Technische Informationen Dauer
physicalCharacteristics: # rico:physicalCharacteristics
de: Technische Informationen Film Bemerkung
rights: # blank nodes rico:Rule with type access
access:
const: onsite
digital:
locator: # ebucore:locator
- System Information Streaming
rights: # blank nodes rico:Rule with type "x"
usage:
name:
const: Copyright not evaluated (CNE) # rights statement
sameAs:
const: http://rightsstatements.org/vocab/CNE/1.0/
access:
const: private
\ No newline at end of file
{
"config-maps": [
{
"path": "configurations/fbcbr/mappings/localTransforms.yml",
"name": "normalization-service-local-transform-config"
},
{
"path": "configurations/fbcbr/mappings/mapping.yml",
"name": "mapper-service-mapping-config"
}
],
"global": {
"processId": "p1kgr",
"institutionId": "kgr",
"recordSetId": "fbcbr"
},
"steps": [
"text-file-validation",
"table-data-transform",
"mapper-service",
"media-metadata-extractor",
"normalization-service",
"fedora-ingest-service"
],
"text-file-validation": {
"appDirectory": "./fbcbr"
},
"table-data-transform": {
"sheetIndex": 1,
"headerCount": 3,
"headerLineIndex": 2,
"identifierIndex": 1
},
"media-metadata-extractor": {
"lastJobName": "mapper-service"
},
"normalization-service": {
"lastJobName": "media-metadata-extractor",
"hasLocalTransformConfig": true
}
}
\ No newline at end of file
splitEntity:
- type: skos:Concept
property: skos:prefLabel
delimiter: ";"
- type: rico:Place
property: rico:name
delimiter: ";"
- type: rico:Language
property: rico:name
delimiter: ","
normalizePerson:
splitEntity:
type: rico:Person
property: rico:name
delimiter: ";"
nameOrder: "first-to-last"
singleNameIsLastName: true
nameDelimiter: SPACE
\ No newline at end of file
record:
uri: ID
isSponsoredByMemoriav: true
type:
const: Film
title:
de: Titel_de
fr: Titel_fr
it: Titel_it
titles: # blank node rico:Title
main:
de: Titel_de
fr: Titel_fr
it: Titel_it
broadcast:
de: Sendungstitel_de
fr: Sendungstitel_fr
it: Sendungstitel_it
series:
de: Serientitel_de
fr: Serientitel_fr
it: Serientitel_it
identifiers: # blank node rico:Identifier
original:
- ID
sameAs: # schema:sameAs
- Link
producers: # rdau:P60441 has producer
corporateBody:
name:
de: Produzent_de
fr: Produzent_fr
it: Produzent_it
publishedBy: # rico:publishedBy
corporateBody:
name:
de: Verlag_de
fr: Verlag_fr
it: Verlag_it
descriptiveNote: # rico:descriptiveNote
de:
- Bemerkung_de
- O-Ton_de
fr:
- Bemerkung_fr
- O-Ton_fr
it:
- Bemerkung_it
- O-Ton_it
relatedPlaces: # blank node rico:Place with relation dcterms:spatial
name:
de: Geo_Zone_de
fr: Geo_Zone_fr
it: Geo_Zone_it
subject: # blank node skos:Concept with rico:hasSubject
- prefLabel:
de:
- Deskriptoren_de
fr:
- Deskriptoren_fr
it:
- Deskriptoren_it
editorialNote:
const: "Ursprungsfeld: Deskriptoren"
placeOfCapture: # turned into a blank node rico:Place with relation rdau:P60556 "has place of capture"
name:
de: Aufnahmeort_de
fr: Aufnahmeort_fr
it: Aufnahmeort_it
relatedAgents: # blank node rico:Agent with rico:hasSubject
person:
name:
de: Erwähnte_Personen_de
fr: Erwähnte_Personen_fr
it: Erwähnte_Personen_it
creators: # a list of creators (with creationRelation rico:type)
- corporateBody:
name:
de: Autor_de
fr: Autor_fr
it: Autor_it
relationName:
const: "Autor"
genre:
- prefLabel:
de:
- Genre_de
fr:
- Genre_fr
it:
- Genre_it
editorialNote:
const: "Ursprungsfeld: Genre"
abstract: # dct:abstract
de: Abstract_de
fr: Abstract_fr
it: Abstract_it
issuedDate: TransmissionDateStart
languages: # blank node rico:Language with type content or caption and relation rico:hasLanguage
content:
de: Kommentarsprache_de
fr: Kommentarsprache_fr
it: Kommentarsprache_it
caption:
de: Untertitel_de
fr: Untertitel_fr
it: Untertitel_it
rights: # blank nodes rico:Rule with type "x"
holder: Rechtinhaber
# TODO: NOT IMPLEMENT YET!!!!
#associatedRecord: # contains ID which should be replaced by link to corresponding record in Memobase
# - ID de
# - ID fr
# - ID it
physical:
carrierType:
de:
- Originalträger_de
fr:
- Originalträger_fr
it:
- Originalträger_it
physicalCharacteristics: # rico:physicalCharacteristics
de:
prefix:
value: "Seitenverhältnis: "
field: Seitenverhältnis_de
fr:
prefix:
value: "Format d'image: "
field: Seitenverhältnis_fr
it:
prefix:
value: "Rapporto d'aspetto: "
field: Seitenverhältnis_it
identifiers: # blank node rico:Identifier
callNumber:
- Signatur
rights: # blank nodes rico:Rule with type "x"
access: Zugang physical
duration: # ebucore:duration
Dauer
colour: # rdau:P60558 has colour content (provisional field)
de: Farbe_de
fr: Farbe_fr
it: Farbe_it
digital:
locator: # ebucore:locator
- Link_Stream_Memobase
rights: # blank nodes rico:Rule with type "x"
usage:
name:
const: In Copyright (InC) # rights statement http://rightsstatements.org/vocab/InC/1.0/
sameAs:
const: http://rightsstatements.org/vocab/InC/1.0/
access: Zugang digital
{
"config-maps": [
{
"path": "configurations/fbfws/mappings/localTransforms.yml",
"name": "normalization-service-local-transform-config"
},
{
"path": "configurations/fbfws/mappings/mapping.yml",
"name": "mapper-service-mapping-config"
}
],
"global": {
"processId": "p1sfw",
"institutionId": "sfw",
"recordSetId": "fbfws"
},
"steps": [
"text-file-validation",
"table-data-transform",
"mapper-service",
"media-metadata-extractor",
"normalization-service",
"fedora-ingest-service"
],
"text-file-validation": {
"appDirectory": "./fbfws"
},
"table-data-transform": {
"sheetIndex": 1,
"headerCount": 1,
"headerLineIndex": 1,
"identifierIndex": 1
},
"media-metadata-extractor": {
"lastJobName": "mapper-service"
},
"normalization-service": {
"lastJobName": "media-metadata-extractor",
"hasLocalTransformConfig": true
}
}
\ No newline at end of file