Use filter_path for improving the search helpers performances (#1199)

This commit is contained in:
Tomas Della Vedova
2020-05-25 14:25:32 +02:00
committed by GitHub
parent d1ad7bd22b
commit 84217fc737
4 changed files with 105 additions and 10 deletions

View File

@ -4,6 +4,8 @@
'use strict'
/* eslint camelcase: 0 */
const { promisify } = require('util')
const { ResponseError, ConfigurationError } = require('./errors')
@ -29,11 +31,14 @@ class Helpers {
/**
* Runs a search operation. The only difference between client.search and this utility,
* is that we are only returning the hits to the user and not the full ES response.
* This helper automatically adds `filter_path=hits.hits._source` to the querystring,
* as it will only need the documents source.
* @param {object} params - The Elasticsearch's search parameters.
* @param {object} options - The client optional configuration for this request.
* @return {array} The documents that matched the request.
*/
async search (params, options) {
appendFilterPath('hits.hits._source', params, true)
const response = await this[kClient].search(params, options)
return this[kGetHits](response.body)
}
@ -63,6 +68,8 @@ class Helpers {
options.ignore = [429]
}
params.scroll = params.scroll || '1m'
appendFilterPath('_scroll_id', params, false)
const { method, body, index, ...querystring } = params
let response = null
for (let i = 0; i < maxRetries; i++) {
@ -74,33 +81,31 @@ class Helpers {
throw new ResponseError(response)
}
let scrollId = response.body._scroll_id
let scroll_id = response.body._scroll_id
let stop = false
const clear = async () => {
stop = true
await this[kClient].clearScroll(
{ body: { scroll_id: scrollId } },
{ body: { scroll_id } },
{ ignore: [400] }
)
}
while (response.body.hits.hits.length > 0) {
scrollId = response.body._scroll_id
while (response.body.hits && response.body.hits.hits.length > 0) {
scroll_id = response.body._scroll_id
response.clear = clear
response.documents = this[kGetHits](response.body)
yield response
if (!scrollId || stop === true) {
if (!scroll_id || stop === true) {
break
}
for (let i = 0; i < maxRetries; i++) {
response = await this[kClient].scroll({
scroll: params.scroll,
body: {
scroll_id: scrollId
}
...querystring,
body: { scroll_id }
}, options)
if (response.statusCode !== 429) break
await sleep(wait)
@ -120,11 +125,14 @@ class Helpers {
* }
* ```
* Each document is what you will find by running a scrollSearch and iterating on the hits array.
* This helper automatically adds `filter_path=hits.hits._source` to the querystring,
* as it will only need the documents source.
* @param {object} params - The Elasticsearch's search parameters.
* @param {object} options - The client optional configuration for this request.
* @return {iterator} the async iterator
*/
async * scrollDocuments (params, options) {
appendFilterPath('hits.hits._source', params, true)
for await (const { documents } of this.scrollSearch(params)) {
for (const document of documents) {
yield document
@ -428,4 +436,14 @@ class Helpers {
}
}
function appendFilterPath (filter, params, force) {
if (params.filter_path !== undefined) {
params.filter_path += ',' + filter
} else if (params.filterPath !== undefined) {
params.filterPath += ',' + filter
} else if (force === true) {
params.filter_path = filter
}
}
module.exports = Helpers