Use async reader for parsing Apache Arrow responses (#2788) (#2792)

This commit is contained in:
Josh Mock
2025-04-24 14:24:17 -05:00
committed by GitHub
parent 48068562d1
commit 28e826d738
3 changed files with 39 additions and 23 deletions

View File

@ -715,7 +715,7 @@ const result = await client.helpers
ES|QL can return results in multiple binary formats, including https://arrow.apache.org/[Apache Arrow]'s streaming format. Because it is a very efficient format to read, it can be valuable for performing high-performance in-memory analytics. And, because the response is streamed as batches of records, it can be used to produce aggregations and other calculations on larger-than-memory data sets.
`toArrowReader` returns a https://arrow.apache.org/docs/js/classes/Arrow_dom.RecordBatchReader.html[`RecordBatchStreamReader`].
`toArrowReader` returns a https://github.com/apache/arrow/blob/520ae44272d491bbb52eb3c9b84864ed7088f11a/js/src/ipc/reader.ts#L216[`AsyncRecordBatchStreamReader`].
[source,ts]
----
@ -724,7 +724,7 @@ const reader = await client.helpers
.toArrowReader()
// print each record as JSON
for (const recordBatch of reader) {
for await (const recordBatch of reader) {
for (const record of recordBatch) {
console.log(record.toJSON())
}

View File

@ -25,7 +25,7 @@ import assert from 'node:assert'
import * as timersPromises from 'node:timers/promises'
import { Readable } from 'node:stream'
import { errors, TransportResult, TransportRequestOptions, TransportRequestOptionsWithMeta } from '@elastic/transport'
import { Table, TypeMap, tableFromIPC, RecordBatchStreamReader } from 'apache-arrow/Arrow.node'
import { Table, TypeMap, tableFromIPC, AsyncRecordBatchStreamReader } from 'apache-arrow/Arrow.node'
import Client from './client'
import * as T from './api/types'
import { Id } from './api/types'
@ -158,7 +158,7 @@ export interface EsqlResponse {
export interface EsqlHelper {
toRecords: <TDocument>() => Promise<EsqlToRecords<TDocument>>
toArrowTable: () => Promise<Table<TypeMap>>
toArrowReader: () => Promise<RecordBatchStreamReader>
toArrowReader: () => Promise<AsyncRecordBatchStreamReader>
}
export interface EsqlToRecords<TDocument> {
@ -1023,7 +1023,7 @@ export default class Helpers {
return tableFromIPC(response)
},
async toArrowReader (): Promise<RecordBatchStreamReader> {
async toArrowReader (): Promise<AsyncRecordBatchStreamReader> {
if (metaHeader !== null) {
reqOptions.headers = reqOptions.headers ?? {}
reqOptions.headers['x-elastic-client-meta'] = `${metaHeader as string},h=qa`
@ -1032,8 +1032,9 @@ export default class Helpers {
params.format = 'arrow'
const response = await client.esql.query(params, reqOptions)
return RecordBatchStreamReader.from(response)
// @ts-expect-error response is a Readable when asStream is true
const response: Readable = await client.esql.query(params, reqOptions)
return await AsyncRecordBatchStreamReader.from(Readable.from(response))
}
}

View File

@ -172,17 +172,28 @@ test('ES|QL helper', t => {
t.end()
})
test('toArrowReader', t => {
t.test('Parses a binary response into an Arrow stream reader', async t => {
const binaryContent = '/////zABAAAQAAAAAAAKAA4ABgANAAgACgAAAAAABAAQAAAAAAEKAAwAAAAIAAQACgAAAAgAAAAIAAAAAAAAAAIAAAB8AAAABAAAAJ7///8UAAAARAAAAEQAAAAAAAoBRAAAAAEAAAAEAAAAjP///wgAAAAQAAAABAAAAGRhdGUAAAAADAAAAGVsYXN0aWM6dHlwZQAAAAAAAAAAgv///wAAAQAEAAAAZGF0ZQAAEgAYABQAEwASAAwAAAAIAAQAEgAAABQAAABMAAAAVAAAAAAAAwFUAAAAAQAAAAwAAAAIAAwACAAEAAgAAAAIAAAAEAAAAAYAAABkb3VibGUAAAwAAABlbGFzdGljOnR5cGUAAAAAAAAAAAAABgAIAAYABgAAAAAAAgAGAAAAYW1vdW50AAAAAAAA/////7gAAAAUAAAAAAAAAAwAFgAOABUAEAAEAAwAAABgAAAAAAAAAAAABAAQAAAAAAMKABgADAAIAAQACgAAABQAAABYAAAABQAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAQAAAAAAAAAIAAAAAAAAACgAAAAAAAAAMAAAAAAAAAABAAAAAAAAADgAAAAAAAAAKAAAAAAAAAAAAAAAAgAAAAUAAAAAAAAAAAAAAAAAAAAFAAAAAAAAAAAAAAAAAAAAHwAAAAAAAAAAAACgmZkTQAAAAGBmZiBAAAAAAAAAL0AAAADAzMwjQAAAAMDMzCtAHwAAAAAAAADV6yywkgEAANWPBquSAQAA1TPgpZIBAADV17mgkgEAANV7k5uSAQAA/////wAAAAA='
test('toArrowReader', async t => {
const testRecords = [
{ amount: 4.900000095367432, },
{ amount: 8.199999809265137, },
{ amount: 15.5, },
{ amount: 9.899999618530273, },
{ amount: 13.899999618530273, },
]
// build reusable Arrow table
const table = arrow.tableFromJSON(testRecords)
const rawData = await arrow.RecordBatchStreamWriter.writeAll(table).toUint8Array()
t.test('Parses a binary response into an Arrow stream reader', async t => {
const MockConnection = connection.buildMockConnection({
onRequest (_params) {
return {
body: Buffer.from(binaryContent, 'base64'),
body: Buffer.from(rawData),
statusCode: 200,
headers: {
'content-type': 'application/vnd.elasticsearch+arrow+stream'
'content-type': 'application/vnd.elasticsearch+arrow+stream',
'transfer-encoding': 'chunked'
}
}
}
@ -196,26 +207,28 @@ test('ES|QL helper', t => {
const result = await client.helpers.esql({ query: 'FROM sample_data' }).toArrowReader()
t.ok(result.isStream())
const recordBatch = result.next().value
t.same(recordBatch.get(0)?.toJSON(), {
amount: 4.900000095367432,
date: 1729532586965,
})
let count = 0
for await (const recordBatch of result) {
for (const record of recordBatch) {
t.same(record.toJSON(), testRecords[count])
count++
}
}
t.end()
})
t.test('ESQL helper uses correct x-elastic-client-meta helper value', async t => {
const binaryContent = '/////zABAAAQAAAAAAAKAA4ABgANAAgACgAAAAAABAAQAAAAAAEKAAwAAAAIAAQACgAAAAgAAAAIAAAAAAAAAAIAAAB8AAAABAAAAJ7///8UAAAARAAAAEQAAAAAAAoBRAAAAAEAAAAEAAAAjP///wgAAAAQAAAABAAAAGRhdGUAAAAADAAAAGVsYXN0aWM6dHlwZQAAAAAAAAAAgv///wAAAQAEAAAAZGF0ZQAAEgAYABQAEwASAAwAAAAIAAQAEgAAABQAAABMAAAAVAAAAAAAAwFUAAAAAQAAAAwAAAAIAAwACAAEAAgAAAAIAAAAEAAAAAYAAABkb3VibGUAAAwAAABlbGFzdGljOnR5cGUAAAAAAAAAAAAABgAIAAYABgAAAAAAAgAGAAAAYW1vdW50AAAAAAAA/////7gAAAAUAAAAAAAAAAwAFgAOABUAEAAEAAwAAABgAAAAAAAAAAAABAAQAAAAAAMKABgADAAIAAQACgAAABQAAABYAAAABQAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAQAAAAAAAAAIAAAAAAAAACgAAAAAAAAAMAAAAAAAAAABAAAAAAAAADgAAAAAAAAAKAAAAAAAAAAAAAAAAgAAAAUAAAAAAAAAAAAAAAAAAAAFAAAAAAAAAAAAAAAAAAAAHwAAAAAAAAAAAACgmZkTQAAAAGBmZiBAAAAAAAAAL0AAAADAzMwjQAAAAMDMzCtAHwAAAAAAAADV6yywkgEAANWPBquSAQAA1TPgpZIBAADV17mgkgEAANV7k5uSAQAA/////wAAAAA='
const MockConnection = connection.buildMockConnection({
onRequest (params) {
const header = params.headers?.['x-elastic-client-meta'] ?? ''
t.ok(header.includes('h=qa'), `Client meta header does not include ESQL helper value: ${header}`)
return {
body: Buffer.from(binaryContent, 'base64'),
body: Buffer.from(rawData),
statusCode: 200,
headers: {
'content-type': 'application/vnd.elasticsearch+arrow+stream'
'content-type': 'application/vnd.elasticsearch+arrow+stream',
'transfer-encoding': 'chunked'
}
}
}
@ -254,10 +267,12 @@ test('ES|QL helper', t => {
new arrow.RecordBatch(schema, batch3.data),
])
const rawData = await arrow.RecordBatchStreamWriter.writeAll(table).toUint8Array()
const MockConnection = connection.buildMockConnection({
onRequest (_params) {
return {
body: Buffer.from(arrow.tableToIPC(table, "stream")),
body: Buffer.from(rawData),
statusCode: 200,
headers: {
'content-type': 'application/vnd.elasticsearch+arrow+stream'
@ -275,7 +290,7 @@ test('ES|QL helper', t => {
t.ok(result.isStream())
let counter = 0
for (const batch of result) {
for await (const batch of result) {
for (const row of batch) {
counter++
const { id, val } = row.toJSON()