From d075e6c6389f803b04b85f81f6a78e06944cd826 Mon Sep 17 00:00:00 2001 From: Spencer Alger Date: Thu, 27 Feb 2014 15:36:52 -0700 Subject: [PATCH 1/7] improvements to the log generator --- scripts/generate/logs/index.js | 24 ++++++++++--------- scripts/generate/logs/samples/index.js | 16 +++++++++++++ .../generate/logs/samples/weighted_list.js | 1 - 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/scripts/generate/logs/index.js b/scripts/generate/logs/index.js index 6defce53d..d9576c883 100644 --- a/scripts/generate/logs/index.js +++ b/scripts/generate/logs/index.js @@ -25,8 +25,10 @@ var async = require('async'); var path = require('path'); var moment = require('moment'); var makeSamples = require('./samples').make; -var startingMoment = moment().startOf('day').subtract('days', argv.days); -var endingMoment = moment().endOf('day').add('days', argv.days); + +var startingMoment = moment().utc().startOf('day').subtract('days', argv.days); +var endingMoment = moment().utc().endOf('day').add('days', argv.days); + var clientConfig = { log: { level: 'trace', @@ -42,14 +44,14 @@ if (argv.host) { } var client = new es.Client(clientConfig); +var samples = makeSamples(startingMoment, endingMoment); console.log('Generating', argv.count, 'events across ±', argv.days, 'days'); fillIndecies(function () { var actions = []; - var samples = makeSamples(startingMoment, endingMoment); - async.times(argv.count, function (i, done) { + async.timesSeries(argv.count, function (i, done) { // random date, plus less random time var date = moment(samples.randomMsInDayRange()) .utc() @@ -110,7 +112,6 @@ fillIndecies(function () { }); function fillIndecies(cb) { - var movingDate = moment(startingMoment); var indexBody = { mappings: { _default_: { @@ -143,12 +144,14 @@ function fillIndecies(cb) { }, ip: { type: 'ip' + }, + memory: { + type: 'double' } } } } - }, - indexPushActions = []; + }; function createDateIndex(indexName) { return function (done) { @@ -166,10 +169,9 @@ function fillIndecies(cb) { }; } - while (movingDate.unix() < endingMoment.unix()) { - indexPushActions.push(createDateIndex(movingDate.format('[logstash-]YYYY.MM.DD'))); - movingDate.add('day', 1); - } + var indexPushActions = samples.days.map(function (moment) { + return createDateIndex(moment.format('[logstash-]YYYY.MM.DD')); + }); async.parallel(indexPushActions, function (err, responses) { if (err) { diff --git a/scripts/generate/logs/samples/index.js b/scripts/generate/logs/samples/index.js index 8253132e3..88e45c304 100644 --- a/scripts/generate/logs/samples/index.js +++ b/scripts/generate/logs/samples/index.js @@ -9,6 +9,18 @@ exports.make = function (startingMoment, endingMoment) { var sets = {}; + sets.days = (function () { + var days = []; + var moving = startingMoment.clone(); + + while (moving <= endingMoment) { + days.push(moving.clone()); + moving.add('day', 1); + } + + return days; + }()); + sets.randomMsInDayRange = new Stochator({ min: startingMoment.toDate().getTime(), max: endingMoment.toDate().getTime() @@ -87,6 +99,10 @@ exports.make = function (startingMoment, endingMoment) { }); return _.transform(sets, function (note, set, name) { + if (name === 'days') { + return note[name] = set; + } + note[name] = _.bindKey(set, 'get'); }, {}); }; diff --git a/scripts/generate/logs/samples/weighted_list.js b/scripts/generate/logs/samples/weighted_list.js index 2babedba3..c96fa9efd 100644 --- a/scripts/generate/logs/samples/weighted_list.js +++ b/scripts/generate/logs/samples/weighted_list.js @@ -12,7 +12,6 @@ function WeightedList(list) { _.forEach(list, _.bindKey(this, 'push')); - console.log(this); } _.inherits(WeightedList, Array); From 64e5009c9b95ec86966ee68da1d9b7dd44ea6d9f Mon Sep 17 00:00:00 2001 From: Spencer Alger Date: Thu, 27 Feb 2014 15:53:12 -0700 Subject: [PATCH 2/7] regenerated the API --- docs/api_methods_1_x.asciidoc | 2 ++ docs/api_methods_master.asciidoc | 2 ++ package.json | 2 +- src/lib/apis/1_x.js | 4 ++++ src/lib/apis/master.js | 4 ++++ 5 files changed, 13 insertions(+), 1 deletion(-) diff --git a/docs/api_methods_1_x.asciidoc b/docs/api_methods_1_x.asciidoc index a7c15c720..48796ef2f 100644 --- a/docs/api_methods_1_x.asciidoc +++ b/docs/api_methods_1_x.asciidoc @@ -2314,6 +2314,8 @@ The default method is `POST` and the usual < Date: Thu, 27 Feb 2014 16:59:19 -0700 Subject: [PATCH 3/7] set useChunkedEncodingByDefault on http requests so that forever-agent will allow them to use the socket pool --- src/lib/connectors/http.js | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib/connectors/http.js b/src/lib/connectors/http.js index 8026e7921..521ba8d0c 100644 --- a/src/lib/connectors/http.js +++ b/src/lib/connectors/http.js @@ -164,6 +164,7 @@ HttpConnector.prototype.request = function (params, cb) { request.setNoDelay(true); request.setSocketKeepAlive(true); request.chunkedEncoding = false; + request.useChunkedEncodingByDefault = false; if (params.body) { request.setHeader('Content-Length', Buffer.byteLength(params.body, 'utf8')); From 630540c76943aeb4368e09a98d0c8569c8418a22 Mon Sep 17 00:00:00 2001 From: Spencer Alger Date: Thu, 27 Feb 2014 17:31:37 -0700 Subject: [PATCH 4/7] use the JDK7 build for master --- scripts/_utils.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scripts/_utils.sh b/scripts/_utils.sh index 8a3dca11f..ccb6df440 100644 --- a/scripts/_utils.sh +++ b/scripts/_utils.sh @@ -71,8 +71,14 @@ function manage_es { local ES_URL="https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-${ES_RELEASE}.zip" local ES_DIR="${SNAPSHOTS}/${ES_VERSION}" else + if [[ $ES_BRANCH == "master" ]]; then + local JDK='JDK7' + else + local JDK='JDK6' + fi + local ES_VERSION="${ES_BRANCH}_nightly" - local ES_URL="http://s3-us-west-2.amazonaws.com/build.elasticsearch.org/origin/$ES_BRANCH/nightly/JDK6/elasticsearch-latest-SNAPSHOT.zip" + local ES_URL="http://s3-us-west-2.amazonaws.com/build.elasticsearch.org/origin/$ES_BRANCH/nightly/$JDK/elasticsearch-latest-SNAPSHOT.zip" local DATE=`date +%Y_%m_%d` local ES_DIR="${SNAPSHOTS}/${ES_VERSION}_${DATE}" fi From 29a0a0329e8f1b4c8cd68ed8d789a296aaf9b2f9 Mon Sep 17 00:00:00 2001 From: Spencer Alger Date: Mon, 3 Mar 2014 08:05:15 -0700 Subject: [PATCH 5/7] fixed deleteByQuery example, regenerated api, closes #58 --- docs/_examples/deleteByQuery.asciidoc | 6 ++++-- docs/api_methods.asciidoc | 6 ++++-- docs/api_methods_1_0.asciidoc | 6 ++++-- docs/api_methods_1_x.asciidoc | 6 ++++-- docs/api_methods_master.asciidoc | 6 ++++-- src/lib/apis/1_0.js | 8 ++++++-- src/lib/apis/1_x.js | 8 ++++++-- src/lib/apis/master.js | 8 ++++++-- 8 files changed, 38 insertions(+), 16 deletions(-) diff --git a/docs/_examples/deleteByQuery.asciidoc b/docs/_examples/deleteByQuery.asciidoc index 476212705..d3ced3c7c 100644 --- a/docs/_examples/deleteByQuery.asciidoc +++ b/docs/_examples/deleteByQuery.asciidoc @@ -12,10 +12,12 @@ client.deleteByQuery({ .Deleting documents using the Query DSL [source,js] --------- -client.delete_by_query({ +client.deleteByQuery({ index: 'posts', body: { - term: { published: false } + query: { + term: { published: false } + } } }, function (error, response) { // ... diff --git a/docs/api_methods.asciidoc b/docs/api_methods.asciidoc index ff1714975..4db713084 100644 --- a/docs/api_methods.asciidoc +++ b/docs/api_methods.asciidoc @@ -402,10 +402,12 @@ client.deleteByQuery({ .Deleting documents using the Query DSL [source,js] --------- -client.delete_by_query({ +client.deleteByQuery({ index: 'posts', body: { - term: { published: false } + query: { + term: { published: false } + } } }, function (error, response) { // ... diff --git a/docs/api_methods_1_0.asciidoc b/docs/api_methods_1_0.asciidoc index 1c619aa03..09f85abe6 100644 --- a/docs/api_methods_1_0.asciidoc +++ b/docs/api_methods_1_0.asciidoc @@ -484,10 +484,12 @@ client.deleteByQuery({ .Deleting documents using the Query DSL [source,js] --------- -client.delete_by_query({ +client.deleteByQuery({ index: 'posts', body: { - term: { published: false } + query: { + term: { published: false } + } } }, function (error, response) { // ... diff --git a/docs/api_methods_1_x.asciidoc b/docs/api_methods_1_x.asciidoc index 48796ef2f..1763fc6bf 100644 --- a/docs/api_methods_1_x.asciidoc +++ b/docs/api_methods_1_x.asciidoc @@ -484,10 +484,12 @@ client.deleteByQuery({ .Deleting documents using the Query DSL [source,js] --------- -client.delete_by_query({ +client.deleteByQuery({ index: 'posts', body: { - term: { published: false } + query: { + term: { published: false } + } } }, function (error, response) { // ... diff --git a/docs/api_methods_master.asciidoc b/docs/api_methods_master.asciidoc index 27840a370..8c9f5af82 100644 --- a/docs/api_methods_master.asciidoc +++ b/docs/api_methods_master.asciidoc @@ -484,10 +484,12 @@ client.deleteByQuery({ .Deleting documents using the Query DSL [source,js] --------- -client.delete_by_query({ +client.deleteByQuery({ index: 'posts', body: { - term: { published: false } + query: { + term: { published: false } + } } }, function (error, response) { // ... diff --git a/src/lib/apis/1_0.js b/src/lib/apis/1_0.js index 0479b8aec..b3d53eb67 100644 --- a/src/lib/apis/1_0.js +++ b/src/lib/apis/1_0.js @@ -843,7 +843,9 @@ api.cluster.prototype.state = ca({ 'blocks', 'metadata', 'nodes', - 'routing_table' + 'routing_table', + 'master_node', + 'version' ] }, index: { @@ -861,7 +863,9 @@ api.cluster.prototype.state = ca({ 'blocks', 'metadata', 'nodes', - 'routing_table' + 'routing_table', + 'master_node', + 'version' ] } } diff --git a/src/lib/apis/1_x.js b/src/lib/apis/1_x.js index c25ac3a8a..eeed6313e 100644 --- a/src/lib/apis/1_x.js +++ b/src/lib/apis/1_x.js @@ -847,7 +847,9 @@ api.cluster.prototype.state = ca({ 'blocks', 'metadata', 'nodes', - 'routing_table' + 'routing_table', + 'master_node', + 'version' ] }, index: { @@ -865,7 +867,9 @@ api.cluster.prototype.state = ca({ 'blocks', 'metadata', 'nodes', - 'routing_table' + 'routing_table', + 'master_node', + 'version' ] } } diff --git a/src/lib/apis/master.js b/src/lib/apis/master.js index 88a30dc00..0424de35e 100644 --- a/src/lib/apis/master.js +++ b/src/lib/apis/master.js @@ -847,7 +847,9 @@ api.cluster.prototype.state = ca({ 'blocks', 'metadata', 'nodes', - 'routing_table' + 'routing_table', + 'master_node', + 'version' ] }, index: { @@ -865,7 +867,9 @@ api.cluster.prototype.state = ca({ 'blocks', 'metadata', 'nodes', - 'routing_table' + 'routing_table', + 'master_node', + 'version' ] } } From 66a399d623c2c1ff0993b2472752285bb87701c9 Mon Sep 17 00:00:00 2001 From: Spencer Alger Date: Mon, 3 Mar 2014 08:32:43 -0700 Subject: [PATCH 6/7] adjusted the 'do' action, so that it will replace $args within the body --- test/integration/yaml_suite/yaml_doc.js | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/test/integration/yaml_suite/yaml_doc.js b/test/integration/yaml_suite/yaml_doc.js index 7b8bcdad8..613abcb78 100644 --- a/test/integration/yaml_suite/yaml_doc.js +++ b/test/integration/yaml_suite/yaml_doc.js @@ -331,9 +331,32 @@ YamlDoc.prototype = { paramName = camelName; } - params[paramName] = (typeof val === 'string' && val[0] === '$') ? this.get(val) : val; + // for ercursively traversing the params to replace '$stashed' vars + var transformObject = function (vals, val, i) { + switch (typeof val) { + case 'string': + val = (val[0] === '$') ? this.get(val) : val; + break; + case 'object': + val = _.transform(val, transformObject); + } + vals[i] = val; + }.bind(this); + + // start with the initial param, only traverse traversables + switch (typeof val) { + case 'string': + val = (val[0] === '$') ? this.get(val) : val; + break; + case 'object': + val = _.transform(val, transformObject); + break; + } + + params[paramName] = val; }, {}, this); + expect(clientAction || clientActionName).to.be.a('function'); if (typeof clientAction === 'function') { From afd628c70dea9fbf2fe99a8c12625eaaac334331 Mon Sep 17 00:00:00 2001 From: Spencer Alger Date: Mon, 3 Mar 2014 10:36:56 -0700 Subject: [PATCH 7/7] General refactor of the log generator so that it will finally create indexes for all of the events it creates. It should also perform a smidge better, but the real bottleneck is the event creation. --- scripts/generate/logs/index.js | 198 +++++++++++++------------ scripts/generate/logs/samples/index.js | 12 -- 2 files changed, 100 insertions(+), 110 deletions(-) diff --git a/scripts/generate/logs/index.js b/scripts/generate/logs/index.js index d9576c883..0a061aa0f 100644 --- a/scripts/generate/logs/index.js +++ b/scripts/generate/logs/index.js @@ -20,7 +20,6 @@ var argv = require('optimist') .argv; var es = require('../../../src/elasticsearch'); -var _ = require('../../../src/lib/utils'); var async = require('async'); var path = require('path'); var moment = require('moment'); @@ -30,11 +29,11 @@ var startingMoment = moment().utc().startOf('day').subtract('days', argv.days); var endingMoment = moment().utc().endOf('day').add('days', argv.days); var clientConfig = { - log: { - level: 'trace', - type: 'file', - path: path.join(__dirname, '../../../log') - } + // log: { + // level: 'trace', + // type: 'file', + // path: path.join(__dirname, '../../../log') + // } }; if (argv.host) { @@ -48,70 +47,14 @@ var samples = makeSamples(startingMoment, endingMoment); console.log('Generating', argv.count, 'events across ±', argv.days, 'days'); -fillIndecies(function () { - var actions = []; +var indices = {}; +var events = []; +var doneEventing = false; +var eventsPerBulk = 3500; +var eventElementsPerBulk = eventsPerBulk * 2; // events are stored next to their headers, so each event has two elements; - async.timesSeries(argv.count, function (i, done) { - // random date, plus less random time - var date = moment(samples.randomMsInDayRange()) - .utc() - .startOf('day') - .add('milliseconds', samples.lessRandomMsInDay()); - - var event = { - index: date.format('[logstash-]YYYY.MM.DD'), - '@timestamp': date.toISOString(), - ip: samples.ips(), - extension: samples.extensions(), - response: samples.responseCodes(), - country: samples.countries(), - point: samples.airports(), - '@tags': [samples.tags(), samples.tags2()], - utc_time: date.toISOString(), - referer: 'http://' + samples.referrers() + '/' + samples.tags() + '/' + samples.astronauts(), - agent: samples.userAgents(), - }; - - event.clientip = event.ip; - event.bytes = event.response < 500 ? samples.lessRandomRespSize() : 0; - event.request = '/' + samples.astronauts() + '.' + event.extension; - event.memory = event.extension === 'php' ? event.bytes * 40 : 0; - if (event.memory) { - event.phpmemory = event.memory; - } - - event['@message'] = event.ip + ' - - [' + date.toISOString() + '] "GET ' + event.request + ' HTTP/1.1" ' + - event.response + ' ' + event.bytes + ' "-" "' + event.agent + '"'; - - actions.push({ - index: { - _index: event.index, - _type: samples.types(), - _id: i - } - }); - actions.push(event); - - if (actions.length === 3000 || i === argv.count - 1) { - console.info('writing', actions.length / 2, 'documents'); - client.bulk({ - body: actions - }, done); - actions = []; - } else { - done(); - } - }, function (err) { - if (err) { - throw err; - } else { - console.log('Done!'); - process.exit(); - } - }); -}); - -function fillIndecies(cb) { +function createIndex(indexName, done) { + console.log('made index', indexName); var indexBody = { mappings: { _default_: { @@ -142,6 +85,9 @@ function fillIndecies(cb) { } } }, + clientip: { + type: 'ip' + }, ip: { type: 'ip' }, @@ -153,35 +99,91 @@ function fillIndecies(cb) { } }; - function createDateIndex(indexName) { - return function (done) { - client.indices.create({ - ignore: 400, - index: indexName, - body: indexBody - }, function (err, resp) { - if (err) { - done(err); - } else { - done(null, resp.error ? 'existed' : 'created'); - } - }); - }; + client.indices.create({ + ignore: 400, + index: indexName, + body: indexBody + }, done); +} + +var bulk = async.queue(function (chunk, done) { + if (typeof chunk === 'string') { + return createIndex(chunk, done); } - var indexPushActions = samples.days.map(function (moment) { - return createDateIndex(moment.format('[logstash-]YYYY.MM.DD')); - }); + console.info('writing', chunk.length / 2, 'documents'); + client.bulk({ + body: chunk + }, done); +}, 3); - async.parallel(indexPushActions, function (err, responses) { - if (err) { - console.error(err.message = 'Unable to create indicies: ' + err.message); - console.error(err.stack); - } else { - _.each(_.groupBy(responses), function (list, did) { - console.info(list.length, 'indicies', did); - }); - cb(); - } - }); -} +bulk.drain = function () { + if (!doneEventing) { + // console.log('indexed faster than the events were created'); + return; + } + + client.close(); + console.log('done'); +}; + +async.timesSeries(argv.count, function (i, done) { + + // random date, plus less random time + var date = moment(samples.randomMsInDayRange()) + .utc() + .startOf('day') + .add('milliseconds', samples.lessRandomMsInDay()); + + var event = {}; + + event.index = date.format('[logstash-]YYYY.MM.DD'); + event['@timestamp'] = date.toISOString(); + event.ip = samples.ips(); + event.extension = samples.extensions(); + event.response = samples.responseCodes(); + event.country = samples.countries(); + event.point = samples.airports(); + event['@tags'] = [ + samples.tags(), + samples.tags2() + ]; + event.utc_time = date.toISOString(); + event.referer = 'http://' + samples.referrers() + '/' + samples.tags() + '/' + samples.astronauts(); + event.agent = samples.userAgents(); + event.clientip = event.ip; + event.bytes = event.response < 500 ? samples.lessRandomRespSize() : 0; + event.request = '/' + samples.astronauts() + '.' + event.extension; + if (event.extension === 'php') { + event.phpmemory = event.memory = event.bytes * 40; + } + event['@message'] = event.ip + ' - - [' + date.toISOString() + '] "GET ' + event.request + ' HTTP/1.1" ' + + event.response + ' ' + event.bytes + ' "-" "' + event.agent + '"'; + + + if (indices[event.index] !== true) { + bulk.push(event.index); // when it receives a string it handles that immediately + indices[event.index] = true; + } + + events.push( + { + index: { + _index: event.index, + _type: samples.types(), + _id: i + } + }, + event + ); + + // eventsPerBulk must be multiplied by 2 because each event is two elements long + if (events.length === eventElementsPerBulk || i === argv.count - 1) { + bulk.push([events.splice(0, eventElementsPerBulk)]); + } + + setImmediate(done); +}, function () { + console.log('done creating events'); + doneEventing = true; +}); \ No newline at end of file diff --git a/scripts/generate/logs/samples/index.js b/scripts/generate/logs/samples/index.js index 88e45c304..4df7ad4ee 100644 --- a/scripts/generate/logs/samples/index.js +++ b/scripts/generate/logs/samples/index.js @@ -9,18 +9,6 @@ exports.make = function (startingMoment, endingMoment) { var sets = {}; - sets.days = (function () { - var days = []; - var moving = startingMoment.clone(); - - while (moving <= endingMoment) { - days.push(moving.clone()); - moving.add('day', 1); - } - - return days; - }()); - sets.randomMsInDayRange = new Stochator({ min: startingMoment.toDate().getTime(), max: endingMoment.toDate().getTime()