Use async reader for parsing Apache Arrow responses (#2788)
This commit is contained in:
@ -619,7 +619,7 @@ Added in `v8.16.0`
|
|||||||
|
|
||||||
ES|QL can return results in multiple binary formats, including [Apache Arrow](https://arrow.apache.org/)'s streaming format. Because it is a very efficient format to read, it can be valuable for performing high-performance in-memory analytics. And, because the response is streamed as batches of records, it can be used to produce aggregations and other calculations on larger-than-memory data sets.
|
ES|QL can return results in multiple binary formats, including [Apache Arrow](https://arrow.apache.org/)'s streaming format. Because it is a very efficient format to read, it can be valuable for performing high-performance in-memory analytics. And, because the response is streamed as batches of records, it can be used to produce aggregations and other calculations on larger-than-memory data sets.
|
||||||
|
|
||||||
`toArrowReader` returns a [`RecordBatchStreamReader`](https://arrow.apache.org/docs/js/classes/Arrow_dom.RecordBatchReader.md).
|
`toArrowReader` returns an [`AsyncRecordBatchStreamReader`](https://github.com/apache/arrow/blob/520ae44272d491bbb52eb3c9b84864ed7088f11a/js/src/ipc/reader.ts#L216).
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
const reader = await client.helpers
|
const reader = await client.helpers
|
||||||
@ -627,7 +627,7 @@ const reader = await client.helpers
|
|||||||
.toArrowReader()
|
.toArrowReader()
|
||||||
|
|
||||||
// print each record as JSON
|
// print each record as JSON
|
||||||
for (const recordBatch of reader) {
|
for await (const recordBatch of reader) {
|
||||||
for (const record of recordBatch) {
|
for (const record of recordBatch) {
|
||||||
console.log(record.toJSON())
|
console.log(record.toJSON())
|
||||||
}
|
}
|
||||||
|
|||||||
@ -11,7 +11,7 @@ import assert from 'node:assert'
|
|||||||
import * as timersPromises from 'node:timers/promises'
|
import * as timersPromises from 'node:timers/promises'
|
||||||
import { Readable } from 'node:stream'
|
import { Readable } from 'node:stream'
|
||||||
import { errors, TransportResult, TransportRequestOptions, TransportRequestOptionsWithMeta } from '@elastic/transport'
|
import { errors, TransportResult, TransportRequestOptions, TransportRequestOptionsWithMeta } from '@elastic/transport'
|
||||||
import { Table, TypeMap, tableFromIPC, RecordBatchStreamReader } from 'apache-arrow/Arrow.node'
|
import { Table, TypeMap, tableFromIPC, AsyncRecordBatchStreamReader } from 'apache-arrow/Arrow.node'
|
||||||
import Client from './client'
|
import Client from './client'
|
||||||
import * as T from './api/types'
|
import * as T from './api/types'
|
||||||
import { Id } from './api/types'
|
import { Id } from './api/types'
|
||||||
@ -135,7 +135,7 @@ export interface EsqlColumn {
|
|||||||
export interface EsqlHelper {
|
export interface EsqlHelper {
|
||||||
toRecords: <TDocument>() => Promise<EsqlToRecords<TDocument>>
|
toRecords: <TDocument>() => Promise<EsqlToRecords<TDocument>>
|
||||||
toArrowTable: () => Promise<Table<TypeMap>>
|
toArrowTable: () => Promise<Table<TypeMap>>
|
||||||
toArrowReader: () => Promise<RecordBatchStreamReader>
|
toArrowReader: () => Promise<AsyncRecordBatchStreamReader>
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface EsqlToRecords<TDocument> {
|
export interface EsqlToRecords<TDocument> {
|
||||||
@ -1000,7 +1000,7 @@ export default class Helpers {
|
|||||||
return tableFromIPC(response)
|
return tableFromIPC(response)
|
||||||
},
|
},
|
||||||
|
|
||||||
async toArrowReader (): Promise<RecordBatchStreamReader> {
|
async toArrowReader (): Promise<AsyncRecordBatchStreamReader> {
|
||||||
if (metaHeader !== null) {
|
if (metaHeader !== null) {
|
||||||
reqOptions.headers = reqOptions.headers ?? {}
|
reqOptions.headers = reqOptions.headers ?? {}
|
||||||
reqOptions.headers['x-elastic-client-meta'] = `${metaHeader as string},h=qa`
|
reqOptions.headers['x-elastic-client-meta'] = `${metaHeader as string},h=qa`
|
||||||
@ -1009,9 +1009,9 @@ export default class Helpers {
|
|||||||
|
|
||||||
params.format = 'arrow'
|
params.format = 'arrow'
|
||||||
|
|
||||||
// @ts-expect-error the return type will be ArrayBuffer when the format is set to 'arrow'
|
// @ts-expect-error response is a Readable when asStream is true
|
||||||
const response: ArrayBuffer = await client.esql.query(params, reqOptions)
|
const response: Readable = await client.esql.query(params, reqOptions)
|
||||||
return RecordBatchStreamReader.from(response)
|
return await AsyncRecordBatchStreamReader.from(Readable.from(response))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -182,17 +182,28 @@ test('ES|QL helper', t => {
|
|||||||
t.end()
|
t.end()
|
||||||
})
|
})
|
||||||
|
|
||||||
test('toArrowReader', t => {
|
test('toArrowReader', async t => {
|
||||||
t.test('Parses a binary response into an Arrow stream reader', async t => {
|
const testRecords = [
|
||||||
const binaryContent = '/////zABAAAQAAAAAAAKAA4ABgANAAgACgAAAAAABAAQAAAAAAEKAAwAAAAIAAQACgAAAAgAAAAIAAAAAAAAAAIAAAB8AAAABAAAAJ7///8UAAAARAAAAEQAAAAAAAoBRAAAAAEAAAAEAAAAjP///wgAAAAQAAAABAAAAGRhdGUAAAAADAAAAGVsYXN0aWM6dHlwZQAAAAAAAAAAgv///wAAAQAEAAAAZGF0ZQAAEgAYABQAEwASAAwAAAAIAAQAEgAAABQAAABMAAAAVAAAAAAAAwFUAAAAAQAAAAwAAAAIAAwACAAEAAgAAAAIAAAAEAAAAAYAAABkb3VibGUAAAwAAABlbGFzdGljOnR5cGUAAAAAAAAAAAAABgAIAAYABgAAAAAAAgAGAAAAYW1vdW50AAAAAAAA/////7gAAAAUAAAAAAAAAAwAFgAOABUAEAAEAAwAAABgAAAAAAAAAAAABAAQAAAAAAMKABgADAAIAAQACgAAABQAAABYAAAABQAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAQAAAAAAAAAIAAAAAAAAACgAAAAAAAAAMAAAAAAAAAABAAAAAAAAADgAAAAAAAAAKAAAAAAAAAAAAAAAAgAAAAUAAAAAAAAAAAAAAAAAAAAFAAAAAAAAAAAAAAAAAAAAHwAAAAAAAAAAAACgmZkTQAAAAGBmZiBAAAAAAAAAL0AAAADAzMwjQAAAAMDMzCtAHwAAAAAAAADV6yywkgEAANWPBquSAQAA1TPgpZIBAADV17mgkgEAANV7k5uSAQAA/////wAAAAA='
|
{ amount: 4.900000095367432, },
|
||||||
|
{ amount: 8.199999809265137, },
|
||||||
|
{ amount: 15.5, },
|
||||||
|
{ amount: 9.899999618530273, },
|
||||||
|
{ amount: 13.899999618530273, },
|
||||||
|
]
|
||||||
|
|
||||||
|
// build reusable Arrow table
|
||||||
|
const table = arrow.tableFromJSON(testRecords)
|
||||||
|
const rawData = await arrow.RecordBatchStreamWriter.writeAll(table).toUint8Array()
|
||||||
|
|
||||||
|
t.test('Parses a binary response into an Arrow stream reader', async t => {
|
||||||
const MockConnection = connection.buildMockConnection({
|
const MockConnection = connection.buildMockConnection({
|
||||||
onRequest (_params) {
|
onRequest (_params) {
|
||||||
return {
|
return {
|
||||||
body: Buffer.from(binaryContent, 'base64'),
|
body: Buffer.from(rawData),
|
||||||
statusCode: 200,
|
statusCode: 200,
|
||||||
headers: {
|
headers: {
|
||||||
'content-type': 'application/vnd.elasticsearch+arrow+stream'
|
'content-type': 'application/vnd.elasticsearch+arrow+stream',
|
||||||
|
'transfer-encoding': 'chunked'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -206,30 +217,8 @@ test('ES|QL helper', t => {
|
|||||||
const result = await client.helpers.esql({ query: 'FROM sample_data' }).toArrowReader()
|
const result = await client.helpers.esql({ query: 'FROM sample_data' }).toArrowReader()
|
||||||
t.ok(result.isStream())
|
t.ok(result.isStream())
|
||||||
|
|
||||||
const testRecords = [
|
|
||||||
{
|
|
||||||
amount: 4.900000095367432,
|
|
||||||
date: 1729532586965,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
amount: 8.199999809265137,
|
|
||||||
date: 1729446186965,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
amount: 15.5,
|
|
||||||
date: 1729359786965,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
amount: 9.899999618530273,
|
|
||||||
date: 1729273386965,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
amount: 13.899999618530273,
|
|
||||||
date: 1729186986965,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
let count = 0
|
let count = 0
|
||||||
for (const recordBatch of result) {
|
for await (const recordBatch of result) {
|
||||||
for (const record of recordBatch) {
|
for (const record of recordBatch) {
|
||||||
t.same(record.toJSON(), testRecords[count])
|
t.same(record.toJSON(), testRecords[count])
|
||||||
count++
|
count++
|
||||||
@ -240,17 +229,16 @@ test('ES|QL helper', t => {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.test('ESQL helper uses correct x-elastic-client-meta helper value', async t => {
|
t.test('ESQL helper uses correct x-elastic-client-meta helper value', async t => {
|
||||||
const binaryContent = '/////zABAAAQAAAAAAAKAA4ABgANAAgACgAAAAAABAAQAAAAAAEKAAwAAAAIAAQACgAAAAgAAAAIAAAAAAAAAAIAAAB8AAAABAAAAJ7///8UAAAARAAAAEQAAAAAAAoBRAAAAAEAAAAEAAAAjP///wgAAAAQAAAABAAAAGRhdGUAAAAADAAAAGVsYXN0aWM6dHlwZQAAAAAAAAAAgv///wAAAQAEAAAAZGF0ZQAAEgAYABQAEwASAAwAAAAIAAQAEgAAABQAAABMAAAAVAAAAAAAAwFUAAAAAQAAAAwAAAAIAAwACAAEAAgAAAAIAAAAEAAAAAYAAABkb3VibGUAAAwAAABlbGFzdGljOnR5cGUAAAAAAAAAAAAABgAIAAYABgAAAAAAAgAGAAAAYW1vdW50AAAAAAAA/////7gAAAAUAAAAAAAAAAwAFgAOABUAEAAEAAwAAABgAAAAAAAAAAAABAAQAAAAAAMKABgADAAIAAQACgAAABQAAABYAAAABQAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAQAAAAAAAAAIAAAAAAAAACgAAAAAAAAAMAAAAAAAAAABAAAAAAAAADgAAAAAAAAAKAAAAAAAAAAAAAAAAgAAAAUAAAAAAAAAAAAAAAAAAAAFAAAAAAAAAAAAAAAAAAAAHwAAAAAAAAAAAACgmZkTQAAAAGBmZiBAAAAAAAAAL0AAAADAzMwjQAAAAMDMzCtAHwAAAAAAAADV6yywkgEAANWPBquSAQAA1TPgpZIBAADV17mgkgEAANV7k5uSAQAA/////wAAAAA='
|
|
||||||
|
|
||||||
const MockConnection = connection.buildMockConnection({
|
const MockConnection = connection.buildMockConnection({
|
||||||
onRequest (params) {
|
onRequest (params) {
|
||||||
const header = params.headers?.['x-elastic-client-meta'] ?? ''
|
const header = params.headers?.['x-elastic-client-meta'] ?? ''
|
||||||
t.ok(header.includes('h=qa'), `Client meta header does not include ESQL helper value: ${header}`)
|
t.ok(header.includes('h=qa'), `Client meta header does not include ESQL helper value: ${header}`)
|
||||||
return {
|
return {
|
||||||
body: Buffer.from(binaryContent, 'base64'),
|
body: Buffer.from(rawData),
|
||||||
statusCode: 200,
|
statusCode: 200,
|
||||||
headers: {
|
headers: {
|
||||||
'content-type': 'application/vnd.elasticsearch+arrow+stream'
|
'content-type': 'application/vnd.elasticsearch+arrow+stream',
|
||||||
|
'transfer-encoding': 'chunked'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -289,10 +277,12 @@ test('ES|QL helper', t => {
|
|||||||
new arrow.RecordBatch(schema, batch3.data),
|
new arrow.RecordBatch(schema, batch3.data),
|
||||||
])
|
])
|
||||||
|
|
||||||
|
const rawData = await arrow.RecordBatchStreamWriter.writeAll(table).toUint8Array()
|
||||||
|
|
||||||
const MockConnection = connection.buildMockConnection({
|
const MockConnection = connection.buildMockConnection({
|
||||||
onRequest (_params) {
|
onRequest (_params) {
|
||||||
return {
|
return {
|
||||||
body: Buffer.from(arrow.tableToIPC(table, "stream")),
|
body: Buffer.from(rawData),
|
||||||
statusCode: 200,
|
statusCode: 200,
|
||||||
headers: {
|
headers: {
|
||||||
'content-type': 'application/vnd.elasticsearch+arrow+stream'
|
'content-type': 'application/vnd.elasticsearch+arrow+stream'
|
||||||
@ -310,7 +300,7 @@ test('ES|QL helper', t => {
|
|||||||
t.ok(result.isStream())
|
t.ok(result.isStream())
|
||||||
|
|
||||||
let counter = 0
|
let counter = 0
|
||||||
for (const batch of result) {
|
for await (const batch of result) {
|
||||||
for (const row of batch) {
|
for (const row of batch) {
|
||||||
counter++
|
counter++
|
||||||
const { id, val } = row.toJSON()
|
const { id, val } = row.toJSON()
|
||||||
|
|||||||
Reference in New Issue
Block a user