Commit 3ead9399 authored by Silvia Witzig's avatar Silvia Witzig

Merge branch 'master' into MEMO-699-Normalization-per-RecordSet

parents 97906b71 ef94c38a
#!/usr/bin/env bash
TFV_CHART_VERSION="0.6.0-chart"
TDT_CHART_VERSION="0.5.0-chart"
XDT_CHART_VERSION="0.2.1-chart"
MAPPER_CHART_VERSION="0.14.3-chart"
MEDIA_CHART_VERSION="0.1.3-chart"
MME_CHART_VERSION="0.1.11-chart"
NORM_CHART_VERSION="0.5.3-chart"
INGESTER_CHART_VERSION="0.2.6-chart"
TDT_CHART_VERSION="0.5.1-chart"
XDT_CHART_VERSION="0.2.3-chart"
MAPPER_CHART_VERSION="0.15.0-chart"
MEDIA_CHART_VERSION="0.1.4-chart"
MME_CHART_VERSION="0.1.15-chart"
NORM_CHART_VERSION="0.5.6-chart"
INGESTER_CHART_VERSION="0.2.7-chart"
......@@ -36,6 +36,22 @@
}
}
},
{
"sort_as_keyword": {
"path_match": "*.sort",
"mapping": {
"type": "keyword"
}
}
},
{
"filter_as_keyword": {
"path_match": "*.filter",
"mapping": {
"type": "keyword"
}
}
},
{
"german_fields": {
"path_match": "*.de",
......@@ -130,6 +146,9 @@
"placeFacet": {
"type": "keyword"
},
"published": {
"type": "boolean"
},
"suggest": {
"properties": {
"title": {
......
This diff is collapsed.
This diff is collapsed.
normalizeDate:
qualifiers: /mappings/mappings/global/qualifier.txt
certainties: /mappings/mappings/global/certainty.txt
singeDateMatchers: /mappings/mappings/global/single-date-matchers.txt
dateRangeMatchers: /mappings/mappings/global/date-range-matchers.txt
qualifiers: /configs/mappings/global/qualifier.txt
certainties: /configs/mappings/global/certainty.txt
singeDateMatchers: /configs/mappings/global/single-date-matchers.txt
dateRangeMatchers: /configs/mappings/global/date-range-matchers.txt
normalizeCarrierType:
path: /mappings/mappings/global/Format_v09.csv
\ No newline at end of file
path: /configs/mappings/global/formats.csv
normalizeLanguages:
path: /configs/mappings/gloabal/languages.csv
\ No newline at end of file
......@@ -3,7 +3,7 @@
export HELM_EXPERIMENTAL_OCI=1
SEARCH_DOC_REGISTRY="cr.gitlab.switch.ch/memoriav/memobase-2020/services/elastic-services/search-doc-service"
SEARCH_DOC_CHART_VERSION="0.5.0-chart"
SEARCH_DOC_CHART_VERSION="0.5.4-chart"
helm chart pull ${SEARCH_DOC_REGISTRY}:${SEARCH_DOC_CHART_VERSION}
helm chart export ${SEARCH_DOC_REGISTRY}:${SEARCH_DOC_CHART_VERSION} -d charts/
......
#!/usr/bin/env bash
kubectl delete configmap format-facet-labels
kubectl create configmap format-facet-labels --from-file labels.csv
SELECT ?item
WHERE
{
wd:PLACEHOLDER rdfs:label ?item .
FILTER(lang(?item) = "de" || lang(?item) = "fr" || lang(?item) = "it" )
}
\ No newline at end of file
import csv
from SPARQLWrapper import SPARQLWrapper, JSON
if __name__ == '__main__':
source = './../../global-configs/transforms/formats.csv'
with open(source, 'r') as fp:
rows = csv.reader(fp, dialect='unix')
all_qs = set()
all_strings = set()
for row in rows:
if row[0] == 'wert':
pass
else:
all_qs.add(row[1])
all_qs.add(row[2])
all_qs.add(row[3])
all_qs.add(row[4])
all_qs.add(row[5])
all_qs.add(row[6])
all_strings.add(row[7])
all_strings.remove('')
all_qs.remove('')
print(all_qs)
print(len(all_qs))
print(all_strings)
s = SPARQLWrapper("https://query.wikidata.org/sparql",
agent='Python Script (University Library Basel, jonas.waeber@unibas.ch)')
with open('query.sparql', 'r') as sp:
request_template = sp.read()
with open('labels.csv', 'w') as w:
writer = csv.writer(w, dialect='unix')
writer.writerow(['id', 'de', 'fr', 'it'])
for q in all_qs:
request = request_template.replace('PLACEHOLDER', q)
s.setQuery(request)
s.setReturnFormat(JSON)
results = s.query().convert()
lang_values = dict()
for row in results['results']['bindings']:
lang_values[row['item']['xml:lang']] = row['item']['value']
writer.writerow([q,
lang_values['de'] if 'de' in lang_values else '',
lang_values['fr'] if 'fr' in lang_values else '',
lang_values['it'] if 'it' in lang_values else ''])
#!/usr/bin/env bash
kubectl delete configmap language-facet-labels
kubectl create configmap language-facet-labels --from-file labels.csv
This diff is collapsed.
SELECT ?item
WHERE
{
wd:PLACEHOLDER rdfs:label ?item .
FILTER(lang(?item) = "de" || lang(?item) = "fr" || lang(?item) = "it" )
}
\ No newline at end of file
import csv
from SPARQLWrapper import SPARQLWrapper, JSON
if __name__ == '__main__':
source = './../../global-configs/transforms/languages.csv'
with open(source, 'r') as fp:
rows = csv.reader(fp, dialect='unix')
all_qs = set()
all_strings = set()
for row in rows:
if row[0] == 'wert':
pass
else:
all_qs.add(row[1])
all_qs.add(row[2])
all_qs.add(row[3])
all_qs.add(row[4])
all_qs.add(row[5])
all_qs.add(row[6])
all_strings.add(row[7])
all_strings.remove('')
all_qs.remove('')
print(all_qs)
print(len(all_qs))
print(all_strings)
s = SPARQLWrapper("https://query.wikidata.org/sparql",
agent='Python Script (University Library Basel, jonas.waeber@unibas.ch)')
with open('query.sparql', 'r') as sp:
request_template = sp.read()
with open('labels.csv', 'w') as w:
writer = csv.writer(w, dialect='unix')
writer.writerow(['id', 'de', 'fr', 'it'])
for q in all_qs:
request = request_template.replace('PLACEHOLDER', q)
s.setQuery(request)
s.setReturnFormat(JSON)
results = s.query().convert()
lang_values = dict()
for row in results['results']['bindings']:
lang_values[row['item']['xml:lang']] = row['item']['value']
writer.writerow([q,
lang_values['de'] if 'de' in lang_values else '',
lang_values['fr'] if 'fr' in lang_values else '',
lang_values['it'] if 'it' in lang_values else ''])
......@@ -43,7 +43,7 @@ def generate_deployment(record_set_id: str, institution_id: str):
"fedora-ingest-service"
],
"text-file-validation": {
"appDirectory": f"./{record_set_id}"
"appDirectory": f"{record_set_id}"
},
"xml-data-transform": {
"recordTag": "record",
......
......@@ -4,7 +4,7 @@ from simple_elastic import ElasticIndex
if __name__ == '__main__':
index = ElasticIndex('documents-v7', url='localhost:8085')
index = ElasticIndex('documents-v8', url='localhost:8080')
print(f'Start: {datetime.now()}')
for items in index.scroll():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment