Commit cccdfd5a authored by Günter Hipler's avatar Günter Hipler
Browse files

zip and encode with Base64 the documents

change of the index definition
parent 872c75b4
curl -XPUT "http://localhost:8080/oai-v1" -H 'Content-Type: application/json' -d'
curl -XPUT "http://mb-es1.memobase.unibas.ch:8080/oai-v1" -H 'Content-Type: application/json' -d'
curl -XPUT "http://mb-es1.memobase.unibas.ch:8080/oai-v2" -H 'Content-Type: application/json' -d'
curl -XPUT "http://localhost:8080/oai-v2" -H 'Content-Type: application/json' -d'
{
"settings": {
"number_of_replicas": 1,
......@@ -10,8 +12,9 @@ curl -XPUT "http://mb-es1.memobase.unibas.ch:8080/oai-v1" -H 'Content-Type: appl
"properties": {
"id": {"type": "keyword"},
"document": {
"type": "keyword",
"index": false
"type": "binary",
"index": false,
"store": true
},
"format": {"type": "keyword"},
"published": {"type": "boolean"},
......
JOB_ID=rico-2-edm-transfromer
KAFKA_BOOTSTRAP_SERVERS=localhost:9092,localhost:9093,localhost:9094
APPLICATION_ID=rico-2-edm-transfromer-03-22
APPLICATION_ID=rico-2-edm-transfromer-03-22-e
TOPIC_IN=fedora-output-json-records
TOPIC_OUT=edm-es-records
TOPIC_PROCESS=import-process-reporting
This diff is collapsed.
......@@ -28,3 +28,5 @@ docker run -v /home/swissbib/environment/code/repositories/memoriav/gitlab/servi
docker run -it --network host edenhill/kafkacat:1.6.0 -C -b VPN:9092 -t import-process-reporting -K '\t'
docker run -it --network host edenhill/kafkacat:1.6.0 -C -b VPN:9092 -t import-process-reporting-es | grep FATAL
docker run -d --network host rm --env-file ./env.txt rico2edm:latest
cd /home/swissbib/environment/code/repositories/memoriav/gitlab/services/elastic-services/elastic-bulk-action-service/gh
docker run -d --network host --rm --env-file ./env.txt elastic-bulk:latest
\ No newline at end of file
doppelte Einträge für die Indexierung ok??
{"id" : "rxb-003-D0023240D63E9125D22535201_mpg", "message" : "", "status" : "SUCCESS", "step" : "indexer-oai-v2", "timestamp" : "2021-03-22T17:39:24.711"}
{"id" : "rxb-003-D0023240D63E9125D22535201_mpg", "message" : "", "status" : "SUCCESS", "step" : "indexer-oai-v2", "timestamp" : "2021-03-22T17:39:42.178"}
\ No newline at end of file
......@@ -21,6 +21,7 @@
package ch.memobase.rico2edm.edm.subjects
import ch.memobase.rico2edm.rdf.writer.RdfXmlWriter
import ch.memobase.rico2edm.utils.Helper
import org.eclipse.rdf4j.model.Model
import org.eclipse.rdf4j.rio.Rio
import org.eclipse.rdf4j.rio.rdfxml.RDFXMLWriter
......@@ -28,6 +29,7 @@ import org.eclipse.rdf4j.rio.rdfxml.RDFXMLWriter
import java.io.StringWriter
import java.time.ZonedDateTime
import java.time.format.DateTimeFormatter
import java.util.Base64
object ModelXMLTransformer {
private val prologPattern = "<\\?xml.*?\\?>".r
......@@ -54,7 +56,7 @@ object ModelXMLTransformer {
prologPattern.replaceFirstIn(ujson.Obj(
//is this the correct ID
"id" -> id,
"document" -> sOut.toString,
"document" -> Base64.getEncoder.encodeToString(Helper.compress(sOut.toString.getBytes)),
"format" -> format,
//we need specific rules to decide which documents are going to be published
//or we have to filter them out
......@@ -63,7 +65,7 @@ object ModelXMLTransformer {
//by now fixed values
"recordset" -> recordset,
"institution" -> institution,
"lastUpdatedDate" -> dateTimeFormatter.format(ZonedDateTime.now).toString
"lastUpdatedDate" -> dateTimeFormatter.format(ZonedDateTime.now)
).toString,"")
}
......
This diff is collapsed.
/*
* rico2edm
* Copyright (C) 2021 UB Basel
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
package ch.memobase.rico2edm.utils
import java.io.ByteArrayOutputStream
import java.util.zip.Deflater
object Helper {
def compress(data: Array[Byte]): Array[Byte] = {
val deflater = new Deflater()
deflater.setInput(data)
val outputStream = new ByteArrayOutputStream(data.length)
deflater.finish()
val buffer = new Array[Byte](1024)
while ( {
!deflater.finished
}) {
val count = deflater.deflate(buffer) // returns the generated code... index
outputStream.write(buffer, 0, count)
}
outputStream.close()
outputStream.toByteArray
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment