Files
elasticsearch-js/docs/examples/scroll.asciidoc
Tomas Della Vedova d7836a16af Client helpers (#1107)
* Added client helpers

* Updated test

* The search helper should return only the documents

* Added code comments

* Fixed bug

* Updated test

* Removed bulkSize and added flushBytes

* Updated test

* Added concurrency

* Updated test

* Added support for 429 handling in the scroll search helper

* Updated test

* Updated stats count

* Updated test

* Fix test

* Use client maxRetries as default

* Updated type definitions

* Refactored bulk helper to be more consistent with the client api

* Updated test

* Improved error handling, added refreshOnCompletion option and forward additinal options to the bulk api

* Updated type definitions

* Updated test

* Fixed test on Node v8

* Updated test

* Added TODO

* Updated docs

* Added Node v8 note

* Updated scripts

* Removed useless files

* Added helpers to integration test

* Fix cli argument position

* Moar fixes

* Test run elasticsearch in github actions

* Use master action version

* Add vm.max_map_count step

* Test new action setup

* Added Configure sysctl limits step

* Updated action to latest version

* Don't run helpers integration test in jenkins

* Run helpers integratino test also with Node v10

* Updated docs

* Updated docs

* Updated helpers type definitions

* Added test for helpers type definitions

* Added license header
2020-03-23 17:43:10 +01:00

193 lines
4.7 KiB
Plaintext

[[scroll_examples]]
== Scroll
While a search request returns a single “page” of results, the scroll API can be
used to retrieve large numbers of results (or even all results) from a single
search request, in much the same way as you would use a cursor on a traditional
database.
Scrolling is not intended for real time user requests, but rather for processing
large amounts of data, for example in order to reindex the contents of one index
into a new index with a different configuration.
NOTE: The results that are returned from a scroll request reflect the state of
the index at the time that the initial search request was made, like a snapshot
in time. Subsequent changes to documents (index, update or delete) will only
affect later search requests.
In order to use scrolling, the initial search request should specify the scroll
parameter in the query string, which tells {es} how long it should keep the
“search context” alive.
NOTE: Did you know that we provide an helper for sending scroll requests? You can find it {jsclient}/client-helpers.html[here].
[source,js]
----
'use strict'
const { Client } = require('@elastic/elasticsearch')
const client = new Client({ node: 'http://localhost:9200' })
async function run () {
const allQuotes = []
const responseQueue = []
// Let's index some data!
const { body: bulkResponse } = await client.bulk({
// here we are forcing an index refresh,
// otherwise we will not get any result
// in the consequent search
refresh: true,
body: [
// operation to perform
{ index: { _index: 'game-of-thrones' } },
// the document to index
{
character: 'Ned Stark',
quote: 'Winter is coming.'
},
{ index: { _index: 'game-of-thrones' } },
{
character: 'Daenerys Targaryen',
quote: 'I am the blood of the dragon.'
},
{ index: { _index: 'game-of-thrones' } },
{
character: 'Tyrion Lannister',
quote: 'A mind needs books like a sword needs a whetstone.'
}
]
})
if (bulkResponse.errors) {
console.log(bulkResponse)
process.exit(1)
}
// start things off by searching, setting a scroll timeout, and pushing
// our first response into the queue to be processed
const response = await client.search({
index: 'game-of-thrones',
// keep the search results "scrollable" for 30 seconds
scroll: '30s',
// for the sake of this example, we will get only one result per search
size: 1,
// filter the source to only include the quote field
_source: ['quote'],
body: {
query: {
match_all: {}
}
}
})
responseQueue.push(response)
while (responseQueue.length) {
const { body } = responseQueue.shift()
// collect the titles from this response
body.hits.hits.forEach(function (hit) {
allQuotes.push(hit._source.quote)
})
// check to see if we have collected all of the quotes
if (body.hits.total.value === allQuotes.length) {
console.log('Every quote', allQuotes)
break
}
// get the next response if there are more quotes to fetch
responseQueue.push(
await client.scroll({
scrollId: body._scroll_id,
scroll: '30s'
})
)
}
}
run().catch(console.log)
----
Another cool usage of the `scroll` API can be done with Node.js ≥ 10, by using
async iteration!
[source,js]
----
'use strict'
const { Client } = require('@elastic/elasticsearch')
const client = new Client({ node: 'http://localhost:9200' })
// Scroll utility
async function * scrollSearch (params) {
var response = await client.search(params)
while (true) {
const sourceHits = response.body.hits.hits
if (sourceHits.length === 0) {
break
}
for (const hit of sourceHits) {
yield hit
}
if (!response.body._scroll_id) {
break
}
response = await client.scroll({
scrollId: response.body._scroll_id,
scroll: params.scroll
})
}
}
async function run () {
await client.bulk({
refresh: true,
body: [
{ index: { _index: 'game-of-thrones' } },
{
character: 'Ned Stark',
quote: 'Winter is coming.'
},
{ index: { _index: 'game-of-thrones' } },
{
character: 'Daenerys Targaryen',
quote: 'I am the blood of the dragon.'
},
{ index: { _index: 'game-of-thrones' } },
{
character: 'Tyrion Lannister',
quote: 'A mind needs books like a sword needs a whetstone.'
}
]
})
const params = {
index: 'game-of-thrones',
scroll: '30s',
size: 1,
_source: ['quote'],
body: {
query: {
match_all: {}
}
}
}
for await (const hit of scrollSearch(params)) {
console.log(hit._source)
}
}
run().catch(console.log)
----