Client helpers (#1107)

* Added client helpers * Updated test * The search helper should return only the documents * Added code comments * Fixed bug * Updated test * Removed bulkSize and added flushBytes * Updated test * Added concurrency * Updated test * Added support for 429 handling in the scroll search helper * Updated test * Updated stats count * Updated test * Fix test * Use client maxRetries as default * Updated type definitions * Refactored bulk helper to be more consistent with the client api * Updated test * Improved error handling, added refreshOnCompletion option and forward additinal options to the bulk api * Updated type definitions * Updated test * Fixed test on Node v8 * Updated test * Added TODO * Updated docs * Added Node v8 note * Updated scripts * Removed useless files * Added helpers to integration test * Fix cli argument position * Moar fixes * Test run elasticsearch in github actions * Use master action version * Add vm.max_map_count step * Test new action setup * Added Configure sysctl limits step * Updated action to latest version * Don't run helpers integration test in jenkins * Run helpers integratino test also with Node v10 * Updated docs * Updated docs * Updated helpers type definitions * Added test for helpers type definitions * Added license header
2020-03-23 17:43:10 +01:00
parent 6c82a4967e
commit d7836a16af
24 changed files with 7654 additions and 11 deletions
--- a/test/integration/helpers/bulk.test.js
+++ b/test/integration/helpers/bulk.test.js
@ -0,0 +1,189 @@
+// Licensed to Elasticsearch B.V under one or more agreements.
+// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
+// See the LICENSE file in the project root for more information
+
+'use strict'
+
+const { createReadStream } = require('fs')
+const { join } = require('path')
+const split = require('split2')
+const { test, beforeEach, afterEach } = require('tap')
+const { waitCluster } = require('../../utils')
+const { Client } = require('../../../')
+
+const datasetPath = join(__dirname, '..', '..', 'fixtures', 'stackoverflow.ndjson')
+const INDEX = `test-helpers-${process.pid}`
+const client = new Client({
+  node: process.env.TEST_ES_SERVER || 'http://localhost:9200'
+})
+
+beforeEach(async () => {
+  await waitCluster(client)
+  await client.indices.create({ index: INDEX })
+})
+
+afterEach(async () => {
+  await client.indices.delete({ index: INDEX }, { ignore: 404 })
+})
+
+test('bulk index', async t => {
+  const stream = createReadStream(datasetPath)
+  const result = await client.helpers.bulk({
+    datasource: stream.pipe(split()),
+    refreshOnCompletion: INDEX,
+    onDrop (doc) {
+      t.fail('It should not drop any document')
+    },
+    onDocument (doc) {
+      return {
+        index: { _index: INDEX }
+      }
+    }
+  })
+
+  t.type(result.time, 'number')
+  t.type(result.bytes, 'number')
+  t.match(result, {
+    total: 5000,
+    successful: 5000,
+    retry: 0,
+    failed: 0,
+    aborted: false
+  })
+
+  const { body } = await client.count({ index: INDEX })
+  t.match(body, { count: 5000 })
+})
+
+test('bulk index with custom id', async t => {
+  const stream = createReadStream(datasetPath)
+  const result = await client.helpers.bulk({
+    datasource: stream.pipe(split(JSON.parse)),
+    onDrop (doc) {
+      t.fail('It should not drop any document')
+    },
+    onDocument (doc) {
+      return {
+        index: {
+          _index: INDEX,
+          _id: doc.id
+        }
+      }
+    }
+  })
+
+  t.type(result.time, 'number')
+  t.type(result.bytes, 'number')
+  t.match(result, {
+    total: 5000,
+    successful: 5000,
+    retry: 0,
+    failed: 0,
+    aborted: false
+  })
+
+  const { body } = await client.get({
+    index: INDEX,
+    id: '19273860' // id of document n° 4242
+  })
+
+  t.strictEqual(body._index, INDEX)
+  t.strictEqual(body._id, '19273860')
+  t.strictEqual(body._source.id, '19273860')
+})
+
+test('abort the operation on document drop', async t => {
+  const stream = createReadStream(datasetPath)
+  const b = client.helpers.bulk({
+    datasource: stream.pipe(split(JSON.parse)),
+    concurrency: 1,
+    onDrop (doc) {
+      t.strictEqual(doc.status, 400)
+      t.strictEqual(doc.error.type, 'mapper_parsing_exception')
+      t.strictEqual(doc.document.id, '45924372')
+      b.abort()
+    },
+    onDocument (doc) {
+      if (doc.id === '45924372') { // id of document n° 500
+        // this will break the mapping
+        doc.title = { foo: 'bar' }
+      }
+      return {
+        index: {
+          _index: INDEX,
+          _id: doc.id
+        }
+      }
+    }
+  })
+
+  const result = await b
+  t.type(result.time, 'number')
+  t.type(result.bytes, 'number')
+  t.strictEqual(result.total - 1, result.successful)
+  t.match(result, {
+    retry: 0,
+    failed: 1,
+    aborted: true
+  })
+})
+
+test('bulk delete', async t => {
+  const indexResult = await client.helpers.bulk({
+    datasource: createReadStream(datasetPath).pipe(split(JSON.parse)),
+    refreshOnCompletion: true,
+    onDrop (doc) {
+      t.fail('It should not drop any document')
+    },
+    onDocument (doc) {
+      return {
+        index: {
+          _index: INDEX,
+          _id: doc.id
+        }
+      }
+    }
+  })
+
+  t.type(indexResult.time, 'number')
+  t.type(indexResult.bytes, 'number')
+  t.match(indexResult, {
+    total: 5000,
+    successful: 5000,
+    retry: 0,
+    failed: 0,
+    aborted: false
+  })
+
+  const { body: afterIndex } = await client.count({ index: INDEX })
+  t.match(afterIndex, { count: 5000 })
+
+  const deleteResult = await client.helpers.bulk({
+    datasource: createReadStream(datasetPath).pipe(split(JSON.parse)),
+    refreshOnCompletion: true,
+    onDrop (doc) {
+      t.fail('It should not drop any document')
+    },
+    onDocument (doc) {
+      return {
+        delete: {
+          _index: INDEX,
+          _id: doc.id
+        }
+      }
+    }
+  })
+
+  t.type(deleteResult.time, 'number')
+  t.type(deleteResult.bytes, 'number')
+  t.match(deleteResult, {
+    total: 5000,
+    successful: 5000,
+    retry: 0,
+    failed: 0,
+    aborted: false
+  })
+
+  const { body: afterDelete } = await client.count({ index: INDEX })
+  t.match(afterDelete, { count: 0 })
+})
--- a/test/integration/helpers/scroll.test.js
+++ b/test/integration/helpers/scroll.test.js
@ -0,0 +1,103 @@
+// Licensed to Elasticsearch B.V under one or more agreements.
+// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
+// See the LICENSE file in the project root for more information
+
+'use strict'
+
+const { createReadStream } = require('fs')
+const { join } = require('path')
+const split = require('split2')
+const { test, beforeEach, afterEach } = require('tap')
+const { waitCluster } = require('../../utils')
+const { Client } = require('../../../')
+
+const INDEX = `test-helpers-${process.pid}`
+const client = new Client({
+  node: process.env.TEST_ES_SERVER || 'http://localhost:9200'
+})
+
+beforeEach(async () => {
+  await waitCluster(client)
+  await client.indices.create({ index: INDEX })
+  const stream = createReadStream(join(__dirname, '..', '..', 'fixtures', 'stackoverflow.ndjson'))
+  const result = await client.helpers.bulk({
+    datasource: stream.pipe(split()),
+    refreshOnCompletion: true,
+    onDocument (doc) {
+      return {
+        index: { _index: INDEX }
+      }
+    }
+  })
+  if (result.failed > 0) {
+    throw new Error('Failed bulk indexing docs')
+  }
+})
+
+afterEach(async () => {
+  await client.indices.delete({ index: INDEX }, { ignore: 404 })
+})
+
+test('search helper', async t => {
+  const scrollSearch = client.helpers.scrollSearch({
+    index: INDEX,
+    body: {
+      query: {
+        match: {
+          title: 'javascript'
+        }
+      }
+    }
+  })
+
+  var count = 0
+  for await (const search of scrollSearch) {
+    count += 1
+    for (const doc of search.documents) {
+      t.true(doc.title.toLowerCase().includes('javascript'))
+    }
+  }
+  t.strictEqual(count, 11)
+})
+
+test('clear a scroll search', async t => {
+  const scrollSearch = client.helpers.scrollSearch({
+    index: INDEX,
+    body: {
+      query: {
+        match: {
+          title: 'javascript'
+        }
+      }
+    }
+  })
+
+  var count = 0
+  for await (const search of scrollSearch) {
+    count += 1
+    if (count === 2) {
+      search.clear()
+    }
+  }
+  t.strictEqual(count, 2)
+})
+
+test('scroll documents', async t => {
+  const scrollSearch = client.helpers.scrollDocuments({
+    index: INDEX,
+    body: {
+      query: {
+        match: {
+          title: 'javascript'
+        }
+      }
+    }
+  })
+
+  var count = 0
+  for await (const doc of scrollSearch) {
+    count += 1
+    t.true(doc.title.toLowerCase().includes('javascript'))
+  }
+  t.strictEqual(count, 106)
+})
--- a/test/integration/helpers/search.test.js
+++ b/test/integration/helpers/search.test.js
@ -0,0 +1,56 @@
+// Licensed to Elasticsearch B.V under one or more agreements.
+// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
+// See the LICENSE file in the project root for more information
+
+'use strict'
+
+const { createReadStream } = require('fs')
+const { join } = require('path')
+const split = require('split2')
+const { test, beforeEach, afterEach } = require('tap')
+const { waitCluster } = require('../../utils')
+const { Client } = require('../../../')
+
+const INDEX = `test-helpers-${process.pid}`
+const client = new Client({
+  node: process.env.TEST_ES_SERVER || 'http://localhost:9200'
+})
+
+beforeEach(async () => {
+  await waitCluster(client)
+  await client.indices.create({ index: INDEX })
+  const stream = createReadStream(join(__dirname, '..', '..', 'fixtures', 'stackoverflow.ndjson'))
+  const result = await client.helpers.bulk({
+    datasource: stream.pipe(split()),
+    refreshOnCompletion: true,
+    onDocument (doc) {
+      return {
+        index: { _index: INDEX }
+      }
+    }
+  })
+  if (result.failed > 0) {
+    throw new Error('Failed bulk indexing docs')
+  }
+})
+
+afterEach(async () => {
+  await client.indices.delete({ index: INDEX }, { ignore: 404 })
+})
+
+test('search helper', async t => {
+  const results = await client.helpers.search({
+    index: INDEX,
+    body: {
+      query: {
+        match: {
+          title: 'javascript'
+        }
+      }
+    }
+  })
+  t.strictEqual(results.length, 10)
+  for (const result of results) {
+    t.true(result.title.toLowerCase().includes('javascript'))
+  }
+})