From bc820ca89e1f74c77d72a56a112118b8fd0dcf3f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 25 May 2020 14:26:58 +0200 Subject: [PATCH] [Backport 7.x] Use filter_path for improving the search helpers performances (#1201) --- docs/helpers.asciidoc | 4 +- lib/Helpers.js | 36 ++++++++++++---- test/unit/helpers/scroll.test.js | 3 ++ test/unit/helpers/search.test.js | 72 ++++++++++++++++++++++++++++++++ 4 files changed, 105 insertions(+), 10 deletions(-) diff --git a/docs/helpers.asciidoc b/docs/helpers.asciidoc index fa5465f48..b8aa72399 100644 --- a/docs/helpers.asciidoc +++ b/docs/helpers.asciidoc @@ -212,7 +212,8 @@ console.log(result) ---- === Search Helper -A simple wrapper around the search API. Instead of returning the entire `result` object it will return only the search documents result. +A simple wrapper around the search API. Instead of returning the entire `result` object it will return only the search documents source. +For improving the performances, this helper automatically adds `filter_path=hits.hits._source` to the querystring. [source,js] ---- @@ -281,6 +282,7 @@ for await (const result of scrollSearch) { === Scroll Documents Helper It works in the same way as the scroll search helper, but it returns only the documents instead. Note, every loop cycle will return you a single document, and you can't use the `clear` method. +For improving the performances, this helper automatically adds `filter_path=hits.hits._source` to the querystring. [source,js] ---- diff --git a/lib/Helpers.js b/lib/Helpers.js index a74cd8ef0..2c00f88d4 100644 --- a/lib/Helpers.js +++ b/lib/Helpers.js @@ -4,6 +4,8 @@ 'use strict' +/* eslint camelcase: 0 */ + const { promisify } = require('util') const { ResponseError, ConfigurationError } = require('./errors') @@ -29,11 +31,14 @@ class Helpers { /** * Runs a search operation. The only difference between client.search and this utility, * is that we are only returning the hits to the user and not the full ES response. + * This helper automatically adds `filter_path=hits.hits._source` to the querystring, + * as it will only need the documents source. * @param {object} params - The Elasticsearch's search parameters. * @param {object} options - The client optional configuration for this request. * @return {array} The documents that matched the request. */ async search (params, options) { + appendFilterPath('hits.hits._source', params, true) const response = await this[kClient].search(params, options) return this[kGetHits](response.body) } @@ -63,6 +68,8 @@ class Helpers { options.ignore = [429] } params.scroll = params.scroll || '1m' + appendFilterPath('_scroll_id', params, false) + const { method, body, index, ...querystring } = params let response = null for (let i = 0; i < maxRetries; i++) { @@ -74,33 +81,31 @@ class Helpers { throw new ResponseError(response) } - let scrollId = response.body._scroll_id + let scroll_id = response.body._scroll_id let stop = false const clear = async () => { stop = true await this[kClient].clearScroll( - { body: { scroll_id: scrollId } }, + { body: { scroll_id } }, { ignore: [400] } ) } - while (response.body.hits.hits.length > 0) { - scrollId = response.body._scroll_id + while (response.body.hits && response.body.hits.hits.length > 0) { + scroll_id = response.body._scroll_id response.clear = clear response.documents = this[kGetHits](response.body) yield response - if (!scrollId || stop === true) { + if (!scroll_id || stop === true) { break } for (let i = 0; i < maxRetries; i++) { response = await this[kClient].scroll({ - scroll: params.scroll, - body: { - scroll_id: scrollId - } + ...querystring, + body: { scroll_id } }, options) if (response.statusCode !== 429) break await sleep(wait) @@ -120,11 +125,14 @@ class Helpers { * } * ``` * Each document is what you will find by running a scrollSearch and iterating on the hits array. + * This helper automatically adds `filter_path=hits.hits._source` to the querystring, + * as it will only need the documents source. * @param {object} params - The Elasticsearch's search parameters. * @param {object} options - The client optional configuration for this request. * @return {iterator} the async iterator */ async * scrollDocuments (params, options) { + appendFilterPath('hits.hits._source', params, true) for await (const { documents } of this.scrollSearch(params)) { for (const document of documents) { yield document @@ -428,4 +436,14 @@ class Helpers { } } +function appendFilterPath (filter, params, force) { + if (params.filter_path !== undefined) { + params.filter_path += ',' + filter + } else if (params.filterPath !== undefined) { + params.filterPath += ',' + filter + } else if (force === true) { + params.filter_path = filter + } +} + module.exports = Helpers diff --git a/test/unit/helpers/scroll.test.js b/test/unit/helpers/scroll.test.js index 1ef1a6cb1..5571265de 100644 --- a/test/unit/helpers/scroll.test.js +++ b/test/unit/helpers/scroll.test.js @@ -183,6 +183,8 @@ test('Scroll search (retry throws later)', async t => { var count = 0 const MockConnection = connection.buildMockConnection({ onRequest (params) { + // filter_path should not be added if is not already present + t.strictEqual(params.querystring, 'scroll=1m') if (count > 1) { count += 1 return { body: {}, statusCode: 429 } @@ -232,6 +234,7 @@ test('Scroll search documents', async t => { var count = 0 const MockConnection = connection.buildMockConnection({ onRequest (params) { + t.strictEqual(params.querystring, 'filter_path=hits.hits._source%2C_scroll_id&scroll=1m') return { body: { _scroll_id: count === 3 ? undefined : 'id', diff --git a/test/unit/helpers/search.test.js b/test/unit/helpers/search.test.js index 920f353dc..e1edbd48d 100644 --- a/test/unit/helpers/search.test.js +++ b/test/unit/helpers/search.test.js @@ -11,6 +11,7 @@ const { connection } = require('../../utils') test('Search should have an additional documents property', async t => { const MockConnection = connection.buildMockConnection({ onRequest (params) { + t.strictEqual(params.querystring, 'filter_path=hits.hits._source') return { body: { hits: { @@ -44,6 +45,7 @@ test('Search should have an additional documents property', async t => { test('kGetHits fallback', async t => { const MockConnection = connection.buildMockConnection({ onRequest (params) { + t.strictEqual(params.querystring, 'filter_path=hits.hits._source') return { body: {} } } }) @@ -59,3 +61,73 @@ test('kGetHits fallback', async t => { }) t.deepEqual(result, []) }) + +test('Merge filter paths (snake_case)', async t => { + const MockConnection = connection.buildMockConnection({ + onRequest (params) { + t.strictEqual(params.querystring, 'filter_path=foo%2Chits.hits._source') + return { + body: { + hits: { + hits: [ + { _source: { one: 'one' } }, + { _source: { two: 'two' } }, + { _source: { three: 'three' } } + ] + } + } + } + } + }) + + const client = new Client({ + node: 'http://localhost:9200', + Connection: MockConnection + }) + + const result = await client.helpers.search({ + index: 'test', + filter_path: 'foo', + body: { foo: 'bar' } + }) + t.deepEqual(result, [ + { one: 'one' }, + { two: 'two' }, + { three: 'three' } + ]) +}) + +test('Merge filter paths (camelCase)', async t => { + const MockConnection = connection.buildMockConnection({ + onRequest (params) { + t.strictEqual(params.querystring, 'filter_path=foo%2Chits.hits._source') + return { + body: { + hits: { + hits: [ + { _source: { one: 'one' } }, + { _source: { two: 'two' } }, + { _source: { three: 'three' } } + ] + } + } + } + } + }) + + const client = new Client({ + node: 'http://localhost:9200', + Connection: MockConnection + }) + + const result = await client.helpers.search({ + index: 'test', + filterPath: 'foo', + body: { foo: 'bar' } + }) + t.deepEqual(result, [ + { one: 'one' }, + { two: 'two' }, + { three: 'three' } + ]) +})