WIP: benchmarks (#745)

* Updated dependencies * Updated .gitignore * WIP: macro and micro benchmarks * Updated benchmark suite * Use the same suite for both macro and micro benchmarks * WIP: benchmark report * Updated benchmark suite * Updated docker scripts * Updated benchmark suite * Updated scripts * Updated benchmark suite * Added split2
2019-03-12 16:45:49 +01:00
parent 3396b5d818
commit a713e28853
9 changed files with 758 additions and 127 deletions
--- a/.gitignore
+++ b/.gitignore
@ -50,3 +50,5 @@ elasticsearch*
 # Generated typings, we don't commit them
 # because we should copy them in the main .d.ts file
 api/generated.d.ts
+
+test/benchmarks/macro/fixtures/*
--- a/package.json
+++ b/package.json
@ -39,17 +39,21 @@
  },
  "devDependencies": {
    "@types/node": "^10.12.24",
+    "convert-hrtime": "^2.0.0",
    "dedent": "^0.7.0",
    "deepmerge": "^3.1.0",
+    "dezalgo": "^1.0.3",
    "js-yaml": "^3.12.1",
    "license-checker": "^25.0.1",
    "lolex": "^3.1.0",
    "minimist": "^1.2.0",
-    "nanobench": "github:delvedor/nanobench#repetitions",
-    "ora": "^3.1.0",
+    "ora": "^3.2.0",
+    "pretty-hrtime": "^1.0.3",
    "rimraf": "^2.6.3",
    "semver": "^5.6.0",
    "simple-git": "^1.107.0",
+    "simple-statistics": "^7.0.2",
+    "split2": "^3.1.0",
    "standard": "^12.0.1",
    "stoppable": "^1.1.0",
    "tap": "^12.6.0",
--- a/scripts/es-docker.sh
+++ b/scripts/es-docker.sh
@ -7,5 +7,7 @@ exec docker run \
  -e "repositories.url.allowed_urls=http://snapshot.*" \
  -e "discovery.type=single-node" \
  -p 9200:9200 \
+  --network=elastic \
+  --name=elasticsearch \
  docker.elastic.co/elasticsearch/elasticsearch:7.0.0-beta1
  # docker.elastic.co/elasticsearch/elasticsearch:6.6.0
--- a/scripts/kibana-docker.sh
+++ b/scripts/kibana-docker.sh
@ -0,0 +1,8 @@
+#!/bin/bash
+
+exec docker run \
+  --rm \
+  -e ELASTICSEARCH_URL="http://elasticsearch:9200" \
+  -p 5601:5601 \
+  --network=elastic \
+  docker.elastic.co/kibana/kibana:7.0.0-beta1
--- a/test/benchmarks/basic.bench.js
+++ b/test/benchmarks/basic.bench.js
@ -1,125 +0,0 @@
-'use strict'
-
-const bench = require('nanobench')
-const { Client } = require('../../index')
-const { connection } = require('../utils')
-
-bench('Initialization', { repetitions: 5 }, b => {
-  const client = new Client({ // eslint-disable-line
-    node: 'http://localhost:9200'
-  })
-  b.end()
-})
-
-bench('Call api with lazy loading', { repetitions: 5 }, b => {
-  const client = new Client({
-    node: 'http://localhost:9200',
-    Connection: connection.MockConnection
-  })
-
-  b.start()
-  client.info((err, result) => {
-    if (err) {
-      b.error(err)
-      return
-    }
-
-    b.end()
-  })
-})
-
-bench('Call api without lazy loading', { repetitions: 5 }, b => {
-  const client = new Client({
-    node: 'http://localhost:9200',
-    Connection: connection.MockConnection
-  })
-
-  client.info((err, result) => {
-    if (err) {
-      b.error(err)
-      return
-    }
-
-    b.start()
-    client.info((err, result) => {
-      if (err) {
-        b.error(err)
-        return
-      }
-
-      b.end()
-    })
-  })
-})
-
-bench('Basic get', { repetitions: 5 }, b => {
-  const client = new Client({
-    node: 'http://localhost:9200',
-    Connection: connection.MockConnection
-  })
-
-  // we run the method twice to skip the lazy loading overhead
-  client.search({
-    index: 'test',
-    type: 'doc',
-    q: 'foo:bar'
-  }, (err, result) => {
-    if (err) {
-      b.error(err)
-      return
-    }
-
-    b.start()
-    client.search({
-      index: 'test',
-      type: 'doc',
-      q: 'foo:bar'
-    }, (err, result) => {
-      if (err) {
-        b.error(err)
-        return
-      }
-      b.end()
-    })
-  })
-})
-
-bench('Basic post', { repetitions: 5 }, b => {
-  const client = new Client({
-    node: 'http://localhost:9200',
-    Connection: connection.MockConnection
-  })
-
-  // we run the method twice to skip the lazy loading overhead
-  client.search({
-    index: 'test',
-    type: 'doc',
-    body: {
-      query: {
-        match: { foo: 'bar' }
-      }
-    }
-  }, (err, result) => {
-    if (err) {
-      b.error(err)
-      return
-    }
-
-    b.start()
-    client.search({
-      index: 'test',
-      type: 'doc',
-      body: {
-        query: {
-          match: { foo: 'bar' }
-        }
-      }
-    }, (err, result) => {
-      if (err) {
-        b.error(err)
-        return
-      }
-      b.end()
-    })
-  })
-})
--- a/test/benchmarks/macro/complex.bench.js
+++ b/test/benchmarks/macro/complex.bench.js
@ -0,0 +1,101 @@
+'use strict'
+
+// This file must be run with --max-old-space-size=8192
+// because we need more than 1Gb of memory
+// eg: node --max-old-space-size=8192 complex.bench.js
+
+const { Client } = require('../../../index')
+const { statSync, createReadStream } = require('fs')
+const { join } = require('path')
+const split = require('split2')
+const { bench, beforeEach, afterEach } = require('../suite')({
+  report: {
+    url: process.env.ES_RESULT_CLUSTER_URL,
+    username: process.env.ES_RESULT_CLUSTER_USERNAME,
+    password: process.env.ES_RESULT_CLUSTER_PASSWORD
+  }
+})
+
+var stackoverflow = []
+const stackoverflowPath = join(
+  __dirname,
+  'fixtures',
+  'stackoverflow.json'
+)
+const stackoverflowInfo = {
+  name: 'stackoverflow.json',
+  size: statSync(join(stackoverflowPath)).size,
+  num_documents: 2000000
+}
+
+const INDEX = 'stackoverflow'
+const node = process.env.ELASTICSEARCH_URL || 'http://localhost:9200'
+
+const client = new Client({ node })
+
+beforeEach(async b => {
+  if (stackoverflow.length === 0) {
+    stackoverflow = await readSOfile()
+  }
+  b.client = client
+  await b.client.indices.delete({ index: 'test-*' })
+})
+
+afterEach(async b => {
+  await b.client.indices.delete({ index: 'test-*' })
+})
+
+bench('Bulk index documents', {
+  warmup: 1,
+  measure: 1,
+  iterations: 1,
+  dataset: stackoverflowInfo,
+  action: 'bulk'
+}, async b => {
+  b.start()
+  for (var i = 0; i < stackoverflow.length; i++) {
+    await b.client.bulk({ body: stackoverflow[i] })
+  }
+  b.end()
+})
+
+bench('Complex search request', {
+  warmup: 3,
+  measure: 5,
+  iterations: 100,
+  dataset: stackoverflowInfo,
+  action: 'search'
+}, async b => {
+  b.start()
+  for (var i = 0; i < b.iterations; i++) {
+    await b.client.search({
+      index: INDEX,
+      body: {
+        query: {
+          match: { title: 'safe' }
+        }
+      }
+    })
+  }
+  b.end()
+})
+
+function readSOfile () {
+  var i = 0
+  var stackoverflow = []
+  return new Promise((resolve, reject) => {
+    createReadStream(stackoverflowPath)
+      .pipe(split(JSON.parse))
+      .on('data', chunk => {
+        stackoverflow[i] = stackoverflow[i] || []
+        stackoverflow[i].push({ index: { _index: INDEX } })
+        stackoverflow[i].push(chunk)
+        // 10k documents
+        if (stackoverflow[i].length >= 10000 * 2) {
+          i++
+        }
+      })
+      .on('error', reject)
+      .on('end', () => resolve(stackoverflow))
+  })
+}
--- a/test/benchmarks/macro/simple.bench.js
+++ b/test/benchmarks/macro/simple.bench.js
@ -0,0 +1,269 @@
+'use strict'
+
+const { Client } = require('../../../index')
+const { statSync } = require('fs')
+const { join } = require('path')
+const { bench, beforeEach, afterEach } = require('../suite')({
+  report: {
+    url: process.env.ES_RESULT_CLUSTER_URL,
+    username: process.env.ES_RESULT_CLUSTER_USERNAME,
+    password: process.env.ES_RESULT_CLUSTER_PASSWORD
+  }
+})
+
+const node = process.env.ELASTICSEARCH_URL || 'http://localhost:9200'
+
+const smallDocument = require('./fixtures/small_document.json')
+const smallDocumentInfo = {
+  name: 'small_document.json',
+  size: statSync(join(__dirname, 'fixtures', 'small_document.json')).size,
+  num_documents: 1
+}
+const largeDocument = require('./fixtures/large_document.json')
+const largeDocumentInfo = {
+  name: 'large_document.json',
+  size: statSync(join(__dirname, 'fixtures', 'large_document.json')).size,
+  num_documents: 1
+}
+
+const client = new Client({ node })
+
+beforeEach(async b => {
+  b.client = client
+  await b.client.indices.delete({ index: 'test-*' })
+})
+
+afterEach(async b => {
+  await b.client.indices.delete({ index: 'test-*' })
+})
+
+bench('Ping', {
+  warmup: 3,
+  measure: 5,
+  iterations: 100,
+  action: 'ping'
+}, async b => {
+  b.start()
+  for (var i = 0; i < b.iterations; i++) {
+    await b.client.ping()
+  }
+  b.end()
+})
+
+bench('Create index', {
+  warmup: 3,
+  measure: 5,
+  iterations: 10,
+  action: 'indices.create'
+}, async b => {
+  b.start()
+  for (var i = 0; i < b.iterations; i++) {
+    await b.client.indices.create({ index: `test-create-${i}` })
+  }
+  b.end()
+})
+
+bench('Index small document', {
+  warmup: 3,
+  measure: 5,
+  iterations: 100,
+  dataset: smallDocumentInfo,
+  action: 'create'
+}, async b => {
+  const now = Date.now() + ''
+  const index = `test-${now}`
+  await b.client.indices.create({ index })
+
+  b.start()
+  for (var i = 0; i < b.iterations; i++) {
+    await b.client.create({
+      index,
+      type: '_doc',
+      id: i + now,
+      body: smallDocument
+    })
+  }
+  b.end()
+})
+
+bench('Index large document', {
+  warmup: 3,
+  measure: 5,
+  iterations: 100,
+  dataset: largeDocumentInfo,
+  action: 'create'
+}, async b => {
+  const now = Date.now() + ''
+  const index = `test-${now}`
+  await b.client.indices.create({ index })
+
+  b.start()
+  for (var i = 0; i < b.iterations; i++) {
+    await b.client.create({
+      index,
+      type: '_doc',
+      id: i + now,
+      body: largeDocument
+    })
+  }
+  b.end()
+})
+
+bench('Get small document', {
+  warmup: 3,
+  measure: 5,
+  iterations: 1000,
+  dataset: smallDocumentInfo,
+  action: 'get'
+}, async b => {
+  const now = Date.now() + ''
+  const index = `test-${now}`
+  await b.client.indices.create({ index })
+
+  await b.client.create({
+    index,
+    type: '_doc',
+    id: now,
+    body: smallDocument
+  })
+
+  b.start()
+  for (var i = 0; i < b.iterations; i++) {
+    await b.client.get({
+      index,
+      type: '_doc',
+      id: now
+    })
+  }
+  b.end()
+})
+
+bench('Get large document', {
+  warmup: 3,
+  measure: 5,
+  iterations: 1000,
+  dataset: largeDocumentInfo,
+  action: 'get'
+}, async b => {
+  const now = Date.now() + ''
+  const index = `test-${now}`
+  await b.client.indices.create({ index })
+
+  await b.client.create({
+    index,
+    type: '_doc',
+    id: now,
+    body: largeDocument
+  })
+
+  b.start()
+  for (var i = 0; i < b.iterations; i++) {
+    await b.client.get({
+      index,
+      type: '_doc',
+      id: now
+    })
+  }
+  b.end()
+})
+
+bench('Search small document', {
+  warmup: 3,
+  measure: 5,
+  iterations: 1000,
+  dataset: smallDocumentInfo,
+  action: 'search'
+}, async b => {
+  const now = Date.now() + ''
+  const index = `test-${now}`
+  await b.client.indices.create({ index })
+
+  await b.client.create({
+    index,
+    type: '_doc',
+    id: now,
+    refresh: true,
+    body: smallDocument
+  })
+
+  b.start()
+  for (var i = 0; i < b.iterations; i++) {
+    await b.client.search({
+      index,
+      type: '_doc',
+      body: {
+        query: {
+          match: { cuisine: 'mexican' }
+        }
+      }
+    })
+  }
+  b.end()
+})
+
+bench('Search large document', {
+  warmup: 3,
+  measure: 5,
+  iterations: 1000,
+  dataset: largeDocumentInfo,
+  action: 'search'
+}, async b => {
+  const now = Date.now() + ''
+  const index = `test-${now}`
+  await b.client.indices.create({ index })
+
+  await b.client.create({
+    index,
+    type: '_doc',
+    id: now,
+    refresh: true,
+    body: largeDocument
+  })
+
+  b.start()
+  for (var i = 0; i < b.iterations; i++) {
+    await b.client.search({
+      index,
+      type: '_doc',
+      body: {
+        query: {
+          match: { 'user.lang': 'en' }
+        }
+      }
+    })
+  }
+  b.end()
+})
+
+bench('Update small document', {
+  warmup: 3,
+  measure: 5,
+  iterations: 100,
+  dataset: smallDocumentInfo,
+  action: 'update'
+}, async b => {
+  const now = Date.now() + ''
+  const index = `test-${now}`
+  await b.client.indices.create({ index })
+
+  await b.client.create({
+    index,
+    type: '_doc',
+    id: now,
+    refresh: true,
+    body: smallDocument
+  })
+
+  b.start()
+  for (var i = 0; i < b.iterations; i++) {
+    await b.client.update({
+      index,
+      type: '_doc',
+      id: now,
+      body: {
+        doc: { cuisine: 'italian' + i }
+      }
+    })
+  }
+  b.end()
+})
--- a/test/benchmarks/micro/basic.bench.js
+++ b/test/benchmarks/micro/basic.bench.js
@ -0,0 +1,98 @@
+'use strict'
+
+const { bench } = require('../suite')({
+  report: {
+    url: process.env.ES_RESULT_CLUSTER_URL,
+    username: process.env.ES_RESULT_CLUSTER_USERNAME,
+    password: process.env.ES_RESULT_CLUSTER_PASSWORD
+  }
+})
+const { Client } = require('../../../index')
+const { connection } = require('../../utils')
+
+bench('Initialization', { warmup: 5, measure: 10, iterations: 1000 }, async b => {
+  b.start()
+  for (var i = 0; i < b.iterations; i++) {
+    const client = new Client({ // eslint-disable-line
+      node: 'http://localhost:9200'
+    })
+  }
+  b.end()
+})
+
+bench('Call api with lazy loading', { warmup: 5, measure: 10 }, async b => {
+  const client = new Client({
+    node: 'http://localhost:9200',
+    Connection: connection.MockConnection
+  })
+
+  b.start()
+  await client.info()
+  b.end()
+})
+
+bench('Call api without lazy loading', { warmup: 5, measure: 10 }, async b => {
+  const client = new Client({
+    node: 'http://localhost:9200',
+    Connection: connection.MockConnection
+  })
+
+  await client.info()
+  b.start()
+  await client.info()
+  b.end()
+})
+
+bench('Basic get', { warmup: 5, measure: 10, iterations: 1000 }, async b => {
+  const client = new Client({
+    node: 'http://localhost:9200',
+    Connection: connection.MockConnection
+  })
+
+  // we run the method twice to skip the lazy loading overhead
+  await client.search({
+    index: 'test',
+    type: 'doc',
+    q: 'foo:bar'
+  })
+  b.start()
+  for (var i = 0; i < b.iterations; i++) {
+    await client.search({
+      index: 'test',
+      type: 'doc',
+      q: 'foo:bar'
+    })
+  }
+  b.end()
+})
+
+bench('Basic post', { warmup: 5, measure: 10, iterations: 1000 }, async b => {
+  const client = new Client({
+    node: 'http://localhost:9200',
+    Connection: connection.MockConnection
+  })
+
+  // we run the method twice to skip the lazy loading overhead
+  await client.search({
+    index: 'test',
+    type: 'doc',
+    body: {
+      query: {
+        match: { foo: 'bar' }
+      }
+    }
+  })
+  b.start()
+  for (var i = 0; i < b.iterations; i++) {
+    await client.search({
+      index: 'test',
+      type: 'doc',
+      body: {
+        query: {
+          match: { foo: 'bar' }
+        }
+      }
+    })
+  }
+  b.end()
+})
--- a/test/benchmarks/suite.js
+++ b/test/benchmarks/suite.js
@ -0,0 +1,272 @@
+'use strict'
+
+const { Client } = require('../../index')
+const clientVersion = require('../../package.json').version
+const { EventEmitter } = require('events')
+const os = require('os')
+const dezalgo = require('dezalgo')
+const convertHrtime = require('convert-hrtime')
+const Git = require('simple-git/promise')
+const workq = require('workq')
+const dedent = require('dedent')
+const ss = require('simple-statistics')
+
+function buildBenchmark (options = {}) {
+  const q = workq()
+  const stats = {}
+  const reports = []
+  var beforeEach = null
+  var afterEach = null
+  var setup = null
+  var teardown = null
+
+  function setBeforeEach (fn) {
+    beforeEach = fn
+  }
+
+  function setAfterEach (fn) {
+    afterEach = fn
+  }
+
+  function setSetup (fn) {
+    setup = fn
+  }
+
+  function setTeardown (fn) {
+    teardown = fn
+  }
+
+  function runSetup (q, done) {
+    if (setup !== null) {
+      setup(() => {
+        setup = null
+        done()
+      })
+    } else {
+      done()
+    }
+  }
+
+  function benchmark (title, opts, fn) {
+    if (fn == null) {
+      fn = opts
+      opts = {}
+    }
+
+    stats[title] = []
+    var { measure, warmup } = opts
+    const b = new B({ iterations: opts.iterations })
+
+    q.add(runSetup)
+    q.add(runBenchmark)
+    q.add(elaborateStats)
+
+    // Task that runs the benchmark and collects the stats
+    function runBenchmark (q, done) {
+      b.comment(`\n# ${title}`)
+      b.once('fail', err => {
+        b.comment(err)
+        if (b.client) {
+          b.client.close(done)
+        } else {
+          done()
+        }
+      })
+
+      process.nextTick(run)
+      async function run () {
+        if (beforeEach) {
+          try {
+            await beforeEach(b)
+          } catch (err) {
+            b.comment('Error: beforeEach hook has failed')
+            return b.fail(err)
+          }
+        }
+
+        try {
+          await fn(b)
+        } catch (err) {
+          return b.fail(err)
+        }
+
+        if (afterEach) {
+          try {
+            await afterEach(b)
+          } catch (err) {
+            b.comment('Error: afterEach hook has failed')
+            return b.fail(err)
+          }
+        }
+
+        // still need to warmup
+        if (warmup-- > 0) {
+          process.nextTick(run)
+        // save the actual measure
+        } else if (measure-- > 0) {
+          stats[title].push(convertHrtime(b.time))
+          process.nextTick(run)
+        // calculate the statistics
+        } else {
+          done()
+        }
+      }
+    }
+
+    // task that elaborate the collected stats
+    async function elaborateStats (q) {
+      const times = stats[title].map(s => s.milliseconds / b.iterations)
+      reports.push({
+        description: title,
+        action: opts.action,
+        category: opts.category || 'simple',
+        dataset: opts.dataset || null,
+        stats: {
+          mean: ss.mean(times),
+          median: ss.median(times),
+          min: ss.min(times),
+          max: ss.max(times),
+          standard_deviation: ss.standardDeviation(times)
+        },
+        repetitions: {
+          measured: opts.measure,
+          warmup: opts.warmup,
+          iterations: opts.iterations
+        }
+      })
+
+      if (b.client) {
+        const { body } = await b.client.nodes.stats({ metric: 'http,jvm,os' })
+        const esStats = body.nodes[Object.keys(body.nodes)[0]]
+        b.comment(dedent`
+          mean: ${ss.mean(times)} ms
+          median: ${ss.median(times)} ms
+          min: ${ss.min(times)} ms
+          max: ${ss.max(times)} ms
+          standard deviation: ${ss.standardDeviation(times)}
+          http total connections: ${esStats.http.total_opened}
+          jvm heap used: ${esStats.jvm.mem.heap_used_percent}%
+        `)
+      } else {
+        b.comment(dedent`
+          mean: ${ss.mean(times)} ms
+          median: ${ss.median(times)} ms
+          min: ${ss.min(times)} ms
+          max: ${ss.max(times)} ms
+          standard deviation: ${ss.standardDeviation(times)}
+        `)
+      }
+    }
+  }
+
+  q.drain(done => {
+    if (teardown) {
+      teardown(done)
+    } else {
+      done()
+    }
+    if (options.report && options.report.url) {
+      sendReport()
+    }
+  })
+
+  async function sendReport () {
+    const client = new Client({
+      node: {
+        url: new URL(options.report.url),
+        username: options.report.username,
+        password: options.report.password
+      }
+    })
+    const git = Git(__dirname)
+    const commit = await git.log(['-1'])
+    const branch = await git.revparse(['--abbrev-ref', 'HEAD'])
+    const { body: esInfo } = await client.info()
+    const { body: esNodes } = await client.nodes.stats({ metric: 'os' })
+
+    const results = reports.map(report => {
+      return {
+        '@timestamp': new Date(),
+        event: {
+          description: report.description,
+          category: report.category,
+          action: report.action,
+          duration: 0,
+          statistics: report.stats,
+          repetitions: report.repetitions,
+          dataset: (report.dataset && report.dataset.name) || null,
+          dataset_details: {
+            size: (report.dataset && report.dataset.size) || 0,
+            num_documents: (report.dataset && report.dataset.num_documents) || 0
+          }
+        },
+        agent: {
+          version: clientVersion,
+          name: '@elastic/elasticsearch-js',
+          git: {
+            branch: branch.slice(0, -1),
+            sha: commit.latest.hash,
+            commit_message: commit.latest.message,
+            repository: 'elasticsearch-js'
+          },
+          language: {
+            version: process.version
+          },
+          os: {
+            platform: `${os.platform()} ${os.release()}`,
+            type: os.type(),
+            architecture: os.arch()
+          }
+        },
+        server: {
+          version: esInfo.version.number,
+          nodes_info: esNodes
+        }
+      }
+    })
+
+    for (var i = 0; i < results.length; i++) {
+      await client.index({
+        index: 'benchmarking_results',
+        type: '_doc',
+        body: results[i]
+      })
+    }
+  }
+
+  return {
+    bench: dezalgo(benchmark),
+    beforeEach: setBeforeEach,
+    afterEach: setAfterEach,
+    setup: setSetup,
+    teardown: setTeardown
+  }
+}
+
+class B extends EventEmitter {
+  constructor (opts) {
+    super()
+    this.begin = 0
+    this.time = 0
+    this.iterations = opts.iterations || 1
+    this.client = null
+  }
+
+  start () {
+    this.begin = process.hrtime()
+  }
+
+  end () {
+    this.time = process.hrtime(this.begin)
+  }
+
+  fail (err) {
+    this.emit('fail', err)
+  }
+
+  comment (...args) {
+    console.log(...args)
+  }
+}
+
+module.exports = buildBenchmark