In order to mitigate against the brute force attacks against Gitlab accounts, we are moving to all edu-ID Logins. We would like to remind you to link your account with your edu-id. Login will be possible only by edu-ID after November 30, 2021. Here you can find the instructions for linking your account.

If you don't have a SWITCH edu-ID, you can create one with this guide here

kind regards

ElasticSearchWrapper.kt 5.75 KB
Newer Older
Jonas Waeber's avatar
Jonas Waeber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/*
 * search-doc-service
 * Copyright (C) 2020 Memoriav
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
18
19
package org.memobase.helpers

20
21
22
import com.beust.klaxon.Klaxon
import com.beust.klaxon.KlaxonException
import java.net.ConnectException
Jonas Waeber's avatar
Jonas Waeber committed
23
24
import java.net.SocketTimeoutException
import java.util.Properties
25
import kotlin.system.exitProcess
26
27
28
import org.apache.http.HttpHost
import org.apache.logging.log4j.LogManager
import org.elasticsearch.ElasticsearchException
Jonas Waeber's avatar
Jonas Waeber committed
29
import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest
30
31
32
import org.elasticsearch.action.search.ClearScrollRequest
import org.elasticsearch.action.search.SearchRequest
import org.elasticsearch.action.search.SearchScrollRequest
33
34
35
36
37
import org.elasticsearch.client.RequestOptions
import org.elasticsearch.client.RestClient
import org.elasticsearch.client.RestHighLevelClient
import org.elasticsearch.client.core.CountRequest
import org.elasticsearch.client.indices.GetIndexRequest
38
39
40
41
42
43
import org.elasticsearch.common.unit.TimeValue
import org.elasticsearch.index.query.QueryBuilders.termQuery
import org.elasticsearch.search.Scroll
import org.elasticsearch.search.builder.SearchSourceBuilder
import org.memobase.model.FacetContainer

44

Jonas Waeber's avatar
Jonas Waeber committed
45
46
47
48
/**
 * This class facilitates a connection to the Elasticsearch cluster and offers convenience functions to retrieve
 * the necessary data.
 */
49
50
51
52
53
class ElasticSearchWrapper(
    settings: Properties,
    private val client: RestHighLevelClient,
    private val translationMappers: TranslationMappers
) {
54
55
56
    private val log = LogManager.getLogger("ElasticSearchWrapper")
    private val documentsIndex = settings.getProperty(KEYS.SettingsProps.elasticIndex)

Jonas Waeber's avatar
Jonas Waeber committed
57

58
59
    private val klaxon = Klaxon()

60

Jonas Waeber's avatar
Jonas Waeber committed
61
62
    /**
     * Counts the number of documents attached to a specific record set.
Jonas Waeber's avatar
Jonas Waeber committed
63
64
65
66
     *
     * @param recordSetIdentifier The memobase identifier of the record set.
     *
     * @return Number of Documents
Jonas Waeber's avatar
Jonas Waeber committed
67
     */
68
    fun countNumberOfDocuments(recordSetIdentifier: String): Int {
69
70
71
72
73
74
75
76
77
78
79
80
81
82
        val request = CountRequest(documentsIndex)
        request.query(
            termQuery(
                "recordSet.facet", recordSetIdentifier
            )
        )
        return try {
            val response = client.count(
                request, RequestOptions.DEFAULT
            )
            response.count.toInt()
        } catch (ex: ElasticsearchException) {
            log.error(ex.detailedMessage)
            0
83
84
        }
    }
85
86

    /**
Jonas Waeber's avatar
Jonas Waeber committed
87
88
89
90
91
92
     * Collects all the document types present in the index for a specific record set.
     *
     * @param recordSetIdentifier: The memobase identifier of the record set.
     * @param queryField: Name of the field the identifier is stored in.
     *
     * @return A list of unique document type facet containers.
93
94
     */
    fun getDocumentTypesFromRecords(recordSetIdentifier: String, queryField: String): List<FacetContainer> {
95
96
97
98
99
100
101
102
103
104
105
106
        return try {
            val resultFacets = mutableListOf<FacetContainer>()
            val typeSet = mutableSetOf<String>()
            val scroll = Scroll(TimeValue.timeValueMinutes(1L))
            val searchRequest = SearchRequest(documentsIndex)
            searchRequest.scroll(scroll)
            val searchSourceBuilder = SearchSourceBuilder()
            searchSourceBuilder.fetchSource(
                arrayOf(
                    "id", "type.filter"
                ), emptyArray<String>()
            )
107

108
109
110
111
112
113
            searchSourceBuilder.query(
                termQuery(
                    queryField, recordSetIdentifier
                )
            )
            searchRequest.source(searchSourceBuilder)
114

115
116
117
            var searchResponse = client.search(searchRequest, RequestOptions.DEFAULT)
            var scrollId = searchResponse.scrollId
            var searchHits = searchResponse.hits.hits
118

119
120
121
122
123
124
125
126
127
128
129
130
131
132
            while (searchHits != null && searchHits.isNotEmpty()) {
                val scrollRequest = SearchScrollRequest(scrollId)
                scrollRequest.scroll(scroll)
                searchResponse = client.scroll(scrollRequest, RequestOptions.DEFAULT)
                scrollId = searchResponse.scrollId
                searchHits = searchResponse.hits.hits
                for (hit in searchHits) {
                    val source = hit.sourceAsString
                    try {
                        val document = klaxon.parse<DocumentResponseSource>(source)
                        if (document != null) {
                            if (!typeSet.contains(document.type.filter)) {
                                resultFacets.add(translationMappers.getDocumentType(document.type.filter))
                                typeSet.add(document.type.filter)
133
134
                            }
                        }
135
136
                    } catch (ex: KlaxonException) {
                        log.error("Unable to parse $source from index $documentsIndex.")
137
138
139
                    }
                }
            }
140
141
142
143
144
145
146
            val clearScrollRequest = ClearScrollRequest()
            clearScrollRequest.addScrollId(scrollId)
            client.clearScroll(clearScrollRequest, RequestOptions.DEFAULT)
            resultFacets
        } catch (ex: ElasticsearchException) {
            log.error(ex.detailedMessage)
            emptyList()
147
148
        }
    }
149
}