Auto-generated API code (#2691)f

This commit is contained in:
Elastic Machine
2025-04-04 20:22:52 +01:00
committed by GitHub
parent e36b0e5374
commit 124753aea7
5 changed files with 2157 additions and 279 deletions

View File

@ -503,7 +503,7 @@ client.deleteByQuery({ index })
** *`default_operator` (Optional, Enum("and" | "or"))*: The default operator for query string query: `AND` or `OR`. This parameter can be used only when the `q` query string parameter is specified.
** *`df` (Optional, string)*: The field to use as default where no field prefix is given in the query string. This parameter can be used only when the `q` query string parameter is specified.
** *`expand_wildcards` (Optional, Enum("all" | "open" | "closed" | "hidden" | "none") | Enum("all" | "open" | "closed" | "hidden" | "none")[])*: The type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. It supports a list of values, such as `open,hidden`.
** *`from` (Optional, number)*: Starting offset (default: 0)
** *`from` (Optional, number)*: Skips the specified number of documents.
** *`ignore_unavailable` (Optional, boolean)*: If `false`, the request returns an error if it targets a missing or closed index.
** *`lenient` (Optional, boolean)*: If `true`, format-based query failures (such as providing text to a numeric field) in the query string will be ignored. This parameter can be used only when the `q` query string parameter is specified.
** *`preference` (Optional, string)*: The node or shard the operation should be performed on. It is random by default.
@ -1318,6 +1318,7 @@ client.openPointInTime({ index, keep_alive })
** *`routing` (Optional, string)*: A custom value that is used to route operations to a specific shard.
** *`expand_wildcards` (Optional, Enum("all" | "open" | "closed" | "hidden" | "none") | Enum("all" | "open" | "closed" | "hidden" | "none")[])*: The type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. It supports a list of values, such as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`.
** *`allow_partial_search_results` (Optional, boolean)*: Indicates whether the point in time tolerates unavailable shards or shard failures when initially creating the PIT. If `false`, creating a point in time request when a shard is missing or unavailable will throw an exception. If `true`, the point in time will contain all the shards that are available at the time of the request.
** *`max_concurrent_shard_requests` (Optional, number)*: Maximum number of concurrent shard requests that each sub-search request executes per node.
[discrete]
=== ping
@ -2268,7 +2269,7 @@ client.updateByQuery({ index })
** *`default_operator` (Optional, Enum("and" | "or"))*: The default operator for query string query: `AND` or `OR`. This parameter can be used only when the `q` query string parameter is specified.
** *`df` (Optional, string)*: The field to use as default where no field prefix is given in the query string. This parameter can be used only when the `q` query string parameter is specified.
** *`expand_wildcards` (Optional, Enum("all" | "open" | "closed" | "hidden" | "none") | Enum("all" | "open" | "closed" | "hidden" | "none")[])*: The type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. It supports a list of values, such as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`.
** *`from` (Optional, number)*: Starting offset (default: 0)
** *`from` (Optional, number)*: Skips the specified number of documents.
** *`ignore_unavailable` (Optional, boolean)*: If `false`, the request returns an error if it targets a missing or closed index.
** *`lenient` (Optional, boolean)*: If `true`, format-based query failures (such as providing text to a numeric field) in the query string will be ignored. This parameter can be used only when the `q` query string parameter is specified.
** *`pipeline` (Optional, string)*: The ID of the pipeline to use to preprocess incoming documents. If the index has a default ingest pipeline specified, then setting the value to `_none` disables the default ingest pipeline for this request. If a final pipeline is configured it will always run, regardless of the value of this parameter.
@ -7157,7 +7158,7 @@ a new date field is added instead of string.
not used at all by Elasticsearch, but can be used to store
application-specific metadata.
** *`numeric_detection` (Optional, boolean)*: Automatically map strings into numeric data types for all fields.
** *`properties` (Optional, Record<string, { type } | { boost, fielddata, index, null_value, type } | { type, enabled, null_value, boost, coerce, script, on_script_error, ignore_malformed, time_series_metric, analyzer, eager_global_ordinals, index, index_options, index_phrases, index_prefixes, norms, position_increment_gap, search_analyzer, search_quote_analyzer, term_vector, format, precision_step, locale } | { relations, eager_global_ordinals, type } | { boost, eager_global_ordinals, index, index_options, script, on_script_error, normalizer, norms, null_value, similarity, split_queries_on_whitespace, time_series_dimension, type } | { type, fields, meta, copy_to } | { type } | { positive_score_impact, type } | { positive_score_impact, type } | { analyzer, index, index_options, max_shingle_size, norms, search_analyzer, search_quote_analyzer, similarity, term_vector, type } | { analyzer, boost, eager_global_ordinals, fielddata, fielddata_frequency_filter, index, index_options, index_phrases, index_prefixes, norms, position_increment_gap, search_analyzer, search_quote_analyzer, similarity, term_vector, type } | { type } | { type, null_value } | { boost, format, ignore_malformed, index, script, on_script_error, null_value, precision_step, type } | { boost, fielddata, format, ignore_malformed, index, script, on_script_error, null_value, precision_step, locale, type } | { type, default_metric, metrics, time_series_metric } | { type, dims, element_type, index, index_options, similarity } | { boost, depth_limit, doc_values, eager_global_ordinals, index, index_options, null_value, similarity, split_queries_on_whitespace, type } | { enabled, include_in_parent, include_in_root, type } | { enabled, subobjects, type } | { type, enabled, priority, time_series_dimension } | { type, meta, inference_id, search_inference_id } | { type } | { analyzer, contexts, max_input_length, preserve_position_increments, preserve_separators, search_analyzer, type } | { value, type } | { type, index } | { path, type } | { ignore_malformed, type } | { boost, index, ignore_malformed, null_value, on_script_error, script, time_series_dimension, type } | { type } | { analyzer, boost, index, null_value, enable_position_increments, type } | { ignore_malformed, ignore_z_value, null_value, index, on_script_error, script, type } | { coerce, ignore_malformed, ignore_z_value, index, orientation, strategy, type } | { ignore_malformed, ignore_z_value, null_value, type } | { coerce, ignore_malformed, ignore_z_value, orientation, type } | { type, null_value } | { type, null_value } | { type, null_value } | { type, null_value } | { type, null_value } | { type, null_value } | { type, null_value, scaling_factor } | { type, null_value } | { type, null_value } | { format, type } | { type } | { type } | { type } | { type } | { type } | { type, norms, index_options, index, null_value, rules, language, country, variant, strength, decomposition, alternate, case_level, case_first, numeric, variable_top, hiragana_quaternary_mode }>)*: Mapping for a field. For new fields, this mapping can include:
** *`properties` (Optional, Record<string, { type } | { boost, fielddata, index, null_value, ignore_malformed, script, on_script_error, time_series_dimension, type } | { type, enabled, null_value, boost, coerce, script, on_script_error, ignore_malformed, time_series_metric, analyzer, eager_global_ordinals, index, index_options, index_phrases, index_prefixes, norms, position_increment_gap, search_analyzer, search_quote_analyzer, term_vector, format, precision_step, locale } | { relations, eager_global_ordinals, type } | { boost, eager_global_ordinals, index, index_options, script, on_script_error, normalizer, norms, null_value, similarity, split_queries_on_whitespace, time_series_dimension, type } | { type, fields, meta, copy_to } | { type } | { positive_score_impact, type } | { positive_score_impact, type } | { analyzer, index, index_options, max_shingle_size, norms, search_analyzer, search_quote_analyzer, similarity, term_vector, type } | { analyzer, boost, eager_global_ordinals, fielddata, fielddata_frequency_filter, index, index_options, index_phrases, index_prefixes, norms, position_increment_gap, search_analyzer, search_quote_analyzer, similarity, term_vector, type } | { type } | { type, null_value } | { boost, format, ignore_malformed, index, script, on_script_error, null_value, precision_step, type } | { boost, fielddata, format, ignore_malformed, index, script, on_script_error, null_value, precision_step, locale, type } | { type, default_metric, metrics, time_series_metric } | { type, dims, element_type, index, index_options, similarity } | { boost, depth_limit, doc_values, eager_global_ordinals, index, index_options, null_value, similarity, split_queries_on_whitespace, type } | { enabled, include_in_parent, include_in_root, type } | { enabled, subobjects, type } | { type, enabled, priority, time_series_dimension } | { type, meta, inference_id, search_inference_id } | { type } | { analyzer, contexts, max_input_length, preserve_position_increments, preserve_separators, search_analyzer, type } | { value, type } | { type, index } | { path, type } | { ignore_malformed, type } | { boost, index, ignore_malformed, null_value, on_script_error, script, time_series_dimension, type } | { type } | { analyzer, boost, index, null_value, enable_position_increments, type } | { ignore_malformed, ignore_z_value, null_value, index, on_script_error, script, type } | { coerce, ignore_malformed, ignore_z_value, index, orientation, strategy, type } | { ignore_malformed, ignore_z_value, null_value, type } | { coerce, ignore_malformed, ignore_z_value, orientation, type } | { type, null_value } | { type, null_value } | { type, null_value } | { type, null_value } | { type, null_value } | { type, null_value } | { type, null_value, scaling_factor } | { type, null_value } | { type, null_value } | { format, type } | { type } | { type } | { type } | { type } | { type } | { type, norms, index_options, index, null_value, rules, language, country, variant, strength, decomposition, alternate, case_level, case_first, numeric, variable_top, hiragana_quaternary_mode }>)*: Mapping for a field. For new fields, this mapping can include:
- Field name
- Field data type
@ -7970,7 +7971,7 @@ Perform chat completion inference
{ref}/chat-completion-inference-api.html[Endpoint documentation]
[source,ts]
----
client.inference.chatCompletionUnified({ inference_id, messages })
client.inference.chatCompletionUnified({ inference_id })
----
[discrete]
@ -7978,14 +7979,7 @@ client.inference.chatCompletionUnified({ inference_id, messages })
* *Request (object):*
** *`inference_id` (string)*: The inference Id
** *`messages` ({ content, role, tool_call_id, tool_calls }[])*: A list of objects representing the conversation.
** *`model` (Optional, string)*: The ID of the model to use.
** *`max_completion_tokens` (Optional, number)*: The upper bound limit for the number of tokens that can be generated for a completion request.
** *`stop` (Optional, string[])*: A sequence of strings to control when the model should stop generating additional tokens.
** *`temperature` (Optional, float)*: The sampling temperature to use.
** *`tool_choice` (Optional, string | { type, function })*: Controls which tool is called by the model.
** *`tools` (Optional, { type, function }[])*: A list of tools that the model can call.
** *`top_p` (Optional, float)*: Nucleus sampling, an alternative to sampling with temperature.
** *`chat_completion_request` (Optional, { messages, model, max_completion_tokens, stop, temperature, tool_choice, tools, top_p })*
** *`timeout` (Optional, string | -1 | 0)*: Specifies the amount of time to wait for the inference request to complete.
[discrete]
@ -8044,6 +8038,25 @@ client.inference.get({ ... })
** *`task_type` (Optional, Enum("sparse_embedding" | "text_embedding" | "rerank" | "completion" | "chat_completion"))*: The task type
** *`inference_id` (Optional, string)*: The inference Id
[discrete]
==== post_eis_chat_completion
Perform a chat completion task through the Elastic Inference Service (EIS).
Perform a chat completion inference task with the `elastic` service.
{ref}/post-inference-api.html[Endpoint documentation]
[source,ts]
----
client.inference.postEisChatCompletion({ eis_inference_id })
----
[discrete]
==== Arguments
* *Request (object):*
** *`eis_inference_id` (string)*: The unique identifier of the inference endpoint.
** *`chat_completion_request` (Optional, { messages, model, max_completion_tokens, stop, temperature, tool_choice, tools, top_p })*
[discrete]
==== put
Create an inference endpoint.
@ -8071,6 +8084,199 @@ client.inference.put({ inference_id })
** *`task_type` (Optional, Enum("sparse_embedding" | "text_embedding" | "rerank" | "completion" | "chat_completion"))*: The task type
** *`inference_config` (Optional, { chunking_settings, service, service_settings, task_settings })*
[discrete]
==== put_alibabacloud
Create an AlibabaCloud AI Search inference endpoint.
Create an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.
When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
After creating the endpoint, wait for the model deployment to complete before using it.
To verify the deployment status, use the get trained model statistics API.
Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`.
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
{ref}/infer-service-alibabacloud-ai-search.html[Endpoint documentation]
[source,ts]
----
client.inference.putAlibabacloud({ task_type, alibabacloud_inference_id, service, service_settings })
----
[discrete]
==== Arguments
* *Request (object):*
** *`task_type` (Enum("completion" | "rerank" | "space_embedding" | "text_embedding"))*: The type of the inference task that the model will perform.
** *`alibabacloud_inference_id` (string)*: The unique identifier of the inference endpoint.
** *`service` (Enum("alibabacloud-ai-search"))*: The type of service supported for the specified task type. In this case, `alibabacloud-ai-search`.
** *`service_settings` ({ api_key, host, rate_limit, service_id, workspace })*: Settings used to install the inference model. These settings are specific to the `alibabacloud-ai-search` service.
** *`chunking_settings` (Optional, { max_chunk_size, overlap, sentence_overlap, strategy })*: The chunking configuration object.
** *`task_settings` (Optional, { input_type, return_token })*: Settings to configure the inference task.
These settings are specific to the task type you specified.
[discrete]
==== put_amazonbedrock
Create an Amazon Bedrock inference endpoint.
Creates an inference endpoint to perform an inference task with the `amazonbedrock` service.
>info
> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.
When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
After creating the endpoint, wait for the model deployment to complete before using it.
To verify the deployment status, use the get trained model statistics API.
Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`.
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
{ref}/infer-service-amazon-bedrock.html[Endpoint documentation]
[source,ts]
----
client.inference.putAmazonbedrock({ task_type, amazonbedrock_inference_id, service, service_settings })
----
[discrete]
==== Arguments
* *Request (object):*
** *`task_type` (Enum("completion" | "text_embedding"))*: The type of the inference task that the model will perform.
** *`amazonbedrock_inference_id` (string)*: The unique identifier of the inference endpoint.
** *`service` (Enum("amazonbedrock"))*: The type of service supported for the specified task type. In this case, `amazonbedrock`.
** *`service_settings` ({ access_key, model, provider, region, rate_limit, secret_key })*: Settings used to install the inference model. These settings are specific to the `amazonbedrock` service.
** *`chunking_settings` (Optional, { max_chunk_size, overlap, sentence_overlap, strategy })*: The chunking configuration object.
** *`task_settings` (Optional, { max_new_tokens, temperature, top_k, top_p })*: Settings to configure the inference task.
These settings are specific to the task type you specified.
[discrete]
==== put_anthropic
Create an Anthropic inference endpoint.
Create an inference endpoint to perform an inference task with the `anthropic` service.
When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
After creating the endpoint, wait for the model deployment to complete before using it.
To verify the deployment status, use the get trained model statistics API.
Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`.
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
{ref}/infer-service-anthropic.html[Endpoint documentation]
[source,ts]
----
client.inference.putAnthropic({ task_type, anthropic_inference_id, service, service_settings })
----
[discrete]
==== Arguments
* *Request (object):*
** *`task_type` (Enum("completion"))*: The task type.
The only valid task type for the model to perform is `completion`.
** *`anthropic_inference_id` (string)*: The unique identifier of the inference endpoint.
** *`service` (Enum("anthropic"))*: The type of service supported for the specified task type. In this case, `anthropic`.
** *`service_settings` ({ api_key, model_id, rate_limit })*: Settings used to install the inference model. These settings are specific to the `watsonxai` service.
** *`chunking_settings` (Optional, { max_chunk_size, overlap, sentence_overlap, strategy })*: The chunking configuration object.
** *`task_settings` (Optional, { max_tokens, temperature, top_k, top_p })*: Settings to configure the inference task.
These settings are specific to the task type you specified.
[discrete]
==== put_azureaistudio
Create an Azure AI studio inference endpoint.
Create an inference endpoint to perform an inference task with the `azureaistudio` service.
When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
After creating the endpoint, wait for the model deployment to complete before using it.
To verify the deployment status, use the get trained model statistics API.
Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`.
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
{ref}/infer-service-azure-ai-studio.html[Endpoint documentation]
[source,ts]
----
client.inference.putAzureaistudio({ task_type, azureaistudio_inference_id, service, service_settings })
----
[discrete]
==== Arguments
* *Request (object):*
** *`task_type` (Enum("completion" | "text_embedding"))*: The type of the inference task that the model will perform.
** *`azureaistudio_inference_id` (string)*: The unique identifier of the inference endpoint.
** *`service` (Enum("azureaistudio"))*: The type of service supported for the specified task type. In this case, `azureaistudio`.
** *`service_settings` ({ api_key, endpoint_type, target, provider, rate_limit })*: Settings used to install the inference model. These settings are specific to the `openai` service.
** *`chunking_settings` (Optional, { max_chunk_size, overlap, sentence_overlap, strategy })*: The chunking configuration object.
** *`task_settings` (Optional, { do_sample, max_new_tokens, temperature, top_p, user })*: Settings to configure the inference task.
These settings are specific to the task type you specified.
[discrete]
==== put_azureopenai
Create an Azure OpenAI inference endpoint.
Create an inference endpoint to perform an inference task with the `azureopenai` service.
The list of chat completion models that you can choose from in your Azure OpenAI deployment include:
* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)
* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)
The list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).
When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
After creating the endpoint, wait for the model deployment to complete before using it.
To verify the deployment status, use the get trained model statistics API.
Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`.
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
{ref}/infer-service-azure-openai.html[Endpoint documentation]
[source,ts]
----
client.inference.putAzureopenai({ task_type, azureopenai_inference_id, service, service_settings })
----
[discrete]
==== Arguments
* *Request (object):*
** *`task_type` (Enum("completion" | "text_embedding"))*: The type of the inference task that the model will perform.
NOTE: The `chat_completion` task type only supports streaming and only through the _stream API.
** *`azureopenai_inference_id` (string)*: The unique identifier of the inference endpoint.
** *`service` (Enum("azureopenai"))*: The type of service supported for the specified task type. In this case, `azureopenai`.
** *`service_settings` ({ api_key, api_version, deployment_id, entra_id, rate_limit, resource_name })*: Settings used to install the inference model. These settings are specific to the `azureopenai` service.
** *`chunking_settings` (Optional, { max_chunk_size, overlap, sentence_overlap, strategy })*: The chunking configuration object.
** *`task_settings` (Optional, { user })*: Settings to configure the inference task.
These settings are specific to the task type you specified.
[discrete]
==== put_cohere
Create a Cohere inference endpoint.
Create an inference endpoint to perform an inference task with the `cohere` service.
When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
After creating the endpoint, wait for the model deployment to complete before using it.
To verify the deployment status, use the get trained model statistics API.
Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`.
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
{ref}/infer-service-cohere.html[Endpoint documentation]
[source,ts]
----
client.inference.putCohere({ task_type, cohere_inference_id, service, service_settings })
----
[discrete]
==== Arguments
* *Request (object):*
** *`task_type` (Enum("completion" | "rerank" | "text_embedding"))*: The type of the inference task that the model will perform.
** *`cohere_inference_id` (string)*: The unique identifier of the inference endpoint.
** *`service` (Enum("cohere"))*: The type of service supported for the specified task type. In this case, `cohere`.
** *`service_settings` ({ api_key, embedding_type, model_id, rate_limit, similarity })*: Settings used to install the inference model.
These settings are specific to the `cohere` service.
** *`chunking_settings` (Optional, { max_chunk_size, overlap, sentence_overlap, strategy })*: The chunking configuration object.
** *`task_settings` (Optional, { input_type, return_documents, top_n, truncate })*: Settings to configure the inference task.
These settings are specific to the task type you specified.
[discrete]
==== put_eis
Create an Elastic Inference Service (EIS) inference endpoint.
@ -8094,21 +8300,244 @@ NOTE: The `chat_completion` task type only supports streaming and only through t
** *`service_settings` ({ model_id, rate_limit })*: Settings used to install the inference model. These settings are specific to the `elastic` service.
[discrete]
==== put_mistral
Configure a Mistral inference endpoint
==== put_elasticsearch
Create an Elasticsearch inference endpoint.
{ref}/infer-service-mistral.html[Endpoint documentation]
Create an inference endpoint to perform an inference task with the `elasticsearch` service.
> info
> Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings.
If you use the ELSER or the E5 model through the `elasticsearch` service, the API request will automatically download and deploy the model if it isn't downloaded yet.
> info
> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.
After creating the endpoint, wait for the model deployment to complete before using it.
To verify the deployment status, use the get trained model statistics API.
Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`.
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
{ref}/infer-service-elasticsearch.html[Endpoint documentation]
[source,ts]
----
client.inference.putMistral()
client.inference.putElasticsearch({ task_type, elasticsearch_inference_id, service, service_settings })
----
[discrete]
==== Arguments
* *Request (object):*
** *`task_type` (Enum("rerank" | "sparse_embedding" | "text_embedding"))*: The type of the inference task that the model will perform.
** *`elasticsearch_inference_id` (string)*: The unique identifier of the inference endpoint.
The must not match the `model_id`.
** *`service` (Enum("elasticsearch"))*: The type of service supported for the specified task type. In this case, `elasticsearch`.
** *`service_settings` ({ adaptive_allocations, deployment_id, model_id, num_allocations, num_threads })*: Settings used to install the inference model. These settings are specific to the `elasticsearch` service.
** *`chunking_settings` (Optional, { max_chunk_size, overlap, sentence_overlap, strategy })*: The chunking configuration object.
** *`task_settings` (Optional, { return_documents })*: Settings to configure the inference task.
These settings are specific to the task type you specified.
[discrete]
==== put_elser
Create an ELSER inference endpoint.
Create an inference endpoint to perform an inference task with the `elser` service.
You can also deploy ELSER by using the Elasticsearch inference integration.
> info
> Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings.
The API request will automatically download and deploy the ELSER model if it isn't already downloaded.
> info
> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.
After creating the endpoint, wait for the model deployment to complete before using it.
To verify the deployment status, use the get trained model statistics API.
Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`.
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
{ref}/infer-service-elser.html[Endpoint documentation]
[source,ts]
----
client.inference.putElser({ task_type, elser_inference_id, service, service_settings })
----
[discrete]
==== Arguments
* *Request (object):*
** *`task_type` (Enum("sparse_embedding"))*: The type of the inference task that the model will perform.
** *`elser_inference_id` (string)*: The unique identifier of the inference endpoint.
** *`service` (Enum("elser"))*: The type of service supported for the specified task type. In this case, `elser`.
** *`service_settings` ({ adaptive_allocations, num_allocations, num_threads })*: Settings used to install the inference model. These settings are specific to the `elser` service.
** *`chunking_settings` (Optional, { max_chunk_size, overlap, sentence_overlap, strategy })*: The chunking configuration object.
[discrete]
==== put_googleaistudio
Create an Google AI Studio inference endpoint.
Create an inference endpoint to perform an inference task with the `googleaistudio` service.
When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
After creating the endpoint, wait for the model deployment to complete before using it.
To verify the deployment status, use the get trained model statistics API.
Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`.
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
{ref}/infer-service-google-ai-studio.html[Endpoint documentation]
[source,ts]
----
client.inference.putGoogleaistudio({ task_type, googleaistudio_inference_id, service, service_settings })
----
[discrete]
==== Arguments
* *Request (object):*
** *`task_type` (Enum("completion" | "text_embedding"))*: The type of the inference task that the model will perform.
** *`googleaistudio_inference_id` (string)*: The unique identifier of the inference endpoint.
** *`service` (Enum("googleaistudio"))*: The type of service supported for the specified task type. In this case, `googleaistudio`.
** *`service_settings` ({ api_key, model_id, rate_limit })*: Settings used to install the inference model. These settings are specific to the `googleaistudio` service.
** *`chunking_settings` (Optional, { max_chunk_size, overlap, sentence_overlap, strategy })*: The chunking configuration object.
[discrete]
==== put_googlevertexai
Create a Google Vertex AI inference endpoint.
Create an inference endpoint to perform an inference task with the `googlevertexai` service.
When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
After creating the endpoint, wait for the model deployment to complete before using it.
To verify the deployment status, use the get trained model statistics API.
Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`.
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
{ref}/infer-service-google-vertex-ai.html[Endpoint documentation]
[source,ts]
----
client.inference.putGooglevertexai({ task_type, googlevertexai_inference_id, service, service_settings })
----
[discrete]
==== Arguments
* *Request (object):*
** *`task_type` (Enum("rerank" | "text_embedding"))*: The type of the inference task that the model will perform.
** *`googlevertexai_inference_id` (string)*: The unique identifier of the inference endpoint.
** *`service` (Enum("googlevertexai"))*: The type of service supported for the specified task type. In this case, `googlevertexai`.
** *`service_settings` ({ location, model_id, project_id, rate_limit, service_account_json })*: Settings used to install the inference model. These settings are specific to the `googlevertexai` service.
** *`chunking_settings` (Optional, { max_chunk_size, overlap, sentence_overlap, strategy })*: The chunking configuration object.
** *`task_settings` (Optional, { auto_truncate, top_n })*: Settings to configure the inference task.
These settings are specific to the task type you specified.
[discrete]
==== put_hugging_face
Create a Hugging Face inference endpoint.
Create an inference endpoint to perform an inference task with the `hugging_face` service.
You must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.
Select the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.
Create the endpoint and copy the URL after the endpoint initialization has been finished.
The following models are recommended for the Hugging Face service:
* `all-MiniLM-L6-v2`
* `all-MiniLM-L12-v2`
* `all-mpnet-base-v2`
* `e5-base-v2`
* `e5-small-v2`
* `multilingual-e5-base`
* `multilingual-e5-small`
When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
After creating the endpoint, wait for the model deployment to complete before using it.
To verify the deployment status, use the get trained model statistics API.
Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`.
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
{ref}/infer-service-hugging-face.html[Endpoint documentation]
[source,ts]
----
client.inference.putHuggingFace({ task_type, huggingface_inference_id, service, service_settings })
----
[discrete]
==== Arguments
* *Request (object):*
** *`task_type` (Enum("text_embedding"))*: The type of the inference task that the model will perform.
** *`huggingface_inference_id` (string)*: The unique identifier of the inference endpoint.
** *`service` (Enum("hugging_face"))*: The type of service supported for the specified task type. In this case, `hugging_face`.
** *`service_settings` ({ api_key, rate_limit, url })*: Settings used to install the inference model. These settings are specific to the `hugging_face` service.
** *`chunking_settings` (Optional, { max_chunk_size, overlap, sentence_overlap, strategy })*: The chunking configuration object.
[discrete]
==== put_jinaai
Create an JinaAI inference endpoint.
Create an inference endpoint to perform an inference task with the `jinaai` service.
To review the available `rerank` models, refer to <https://jina.ai/reranker>.
To review the available `text_embedding` models, refer to the <https://jina.ai/embeddings/>.
When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
After creating the endpoint, wait for the model deployment to complete before using it.
To verify the deployment status, use the get trained model statistics API.
Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`.
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
{ref}/infer-service-jinaai.html[Endpoint documentation]
[source,ts]
----
client.inference.putJinaai({ task_type, jinaai_inference_id, service, service_settings })
----
[discrete]
==== Arguments
* *Request (object):*
** *`task_type` (Enum("rerank" | "text_embedding"))*: The type of the inference task that the model will perform.
** *`jinaai_inference_id` (string)*: The unique identifier of the inference endpoint.
** *`service` (Enum("jinaai"))*: The type of service supported for the specified task type. In this case, `jinaai`.
** *`service_settings` ({ api_key, model_id, rate_limit, similarity })*: Settings used to install the inference model. These settings are specific to the `jinaai` service.
** *`chunking_settings` (Optional, { max_chunk_size, overlap, sentence_overlap, strategy })*: The chunking configuration object.
** *`task_settings` (Optional, { return_documents, task, top_n })*: Settings to configure the inference task.
These settings are specific to the task type you specified.
[discrete]
==== put_mistral
Create a Mistral inference endpoint.
Creates an inference endpoint to perform an inference task with the `mistral` service.
When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
After creating the endpoint, wait for the model deployment to complete before using it.
To verify the deployment status, use the get trained model statistics API.
Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`.
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
[source,ts]
----
client.inference.putMistral({ task_type, mistral_inference_id, service, service_settings })
----
[discrete]
==== Arguments
* *Request (object):*
** *`task_type` (Enum("text_embedding"))*: The task type.
The only valid task type for the model to perform is `text_embedding`.
** *`mistral_inference_id` (string)*: The unique identifier of the inference endpoint.
** *`service` (Enum("mistral"))*: The type of service supported for the specified task type. In this case, `mistral`.
** *`service_settings` ({ api_key, max_input_tokens, model, rate_limit })*: Settings used to install the inference model. These settings are specific to the `mistral` service.
** *`chunking_settings` (Optional, { max_chunk_size, overlap, sentence_overlap, strategy })*: The chunking configuration object.
[discrete]
==== put_openai
Create an OpenAI inference endpoint.
Create an inference endpoint to perform an inference task with the `openai` service.
Create an inference endpoint to perform an inference task with the `openai` service or `openai` compatible APIs.
When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
After creating the endpoint, wait for the model deployment to complete before using it.
@ -8129,7 +8558,7 @@ client.inference.putOpenai({ task_type, openai_inference_id, service, service_se
** *`task_type` (Enum("chat_completion" | "completion" | "text_embedding"))*: The type of the inference task that the model will perform.
NOTE: The `chat_completion` task type only supports streaming and only through the _stream API.
** *`openai_inference_id` (string)*: The unique identifier of the inference endpoint.
** *`service` (Enum("elastic"))*: The type of service supported for the specified task type. In this case, `openai`.
** *`service` (Enum("openai"))*: The type of service supported for the specified task type. In this case, `openai`.
** *`service_settings` ({ api_key, dimensions, model_id, organization_id, rate_limit, url })*: Settings used to install the inference model. These settings are specific to the `openai` service.
** *`chunking_settings` (Optional, { max_chunk_size, overlap, sentence_overlap, strategy })*: The chunking configuration object.
** *`task_settings` (Optional, { user })*: Settings to configure the inference task.
@ -8155,7 +8584,7 @@ client.inference.putVoyageai({ task_type, voyageai_inference_id, service, servic
* *Request (object):*
** *`task_type` (Enum("text_embedding" | "rerank"))*: The type of the inference task that the model will perform.
** *`voyageai_inference_id` (string)*: The unique identifier of the inference endpoint.
** *`service` (Enum("elastic"))*: The type of service supported for the specified task type. In this case, `voyageai`.
** *`service` (Enum("voyageai"))*: The type of service supported for the specified task type. In this case, `voyageai`.
** *`service_settings` ({ dimensions, model_id, rate_limit, embedding_type })*: Settings used to install the inference model. These settings are specific to the `voyageai` service.
** *`chunking_settings` (Optional, { max_chunk_size, overlap, sentence_overlap, strategy })*: The chunking configuration object.
** *`task_settings` (Optional, { input_type, return_documents, top_k, truncation })*: Settings to configure the inference task.
@ -8188,7 +8617,7 @@ client.inference.putWatsonx({ task_type, watsonx_inference_id, service, service_
** *`task_type` (Enum("text_embedding"))*: The task type.
The only valid task type for the model to perform is `text_embedding`.
** *`watsonx_inference_id` (string)*: The unique identifier of the inference endpoint.
** *`service` (Enum("elastic"))*: The type of service supported for the specified task type. In this case, `watsonxai`.
** *`service` (Enum("watsonxai"))*: The type of service supported for the specified task type. In this case, `watsonxai`.
** *`service_settings` ({ api_key, api_version, model_id, project_id, rate_limit, url })*: Settings used to install the inference model. These settings are specific to the `watsonxai` service.
[discrete]
@ -10305,7 +10734,7 @@ specified.
** *`definition` (Optional, { preprocessors, trained_model })*: The inference definition for the model. If definition is specified, then
compressed_definition cannot be specified.
** *`description` (Optional, string)*: A human-readable description of the inference trained model.
** *`inference_config` (Optional, { regression, classification, text_classification, zero_shot_classification, fill_mask, ner, pass_through, text_embedding, text_expansion, question_answering })*: The default configuration for inference. This can be either a regression
** *`inference_config` (Optional, { regression, classification, text_classification, zero_shot_classification, fill_mask, learning_to_rank, ner, pass_through, text_embedding, text_expansion, question_answering })*: The default configuration for inference. This can be either a regression
or classification configuration. It must match the underlying
definition.trained_model's target_type. For pre-packaged models such as
ELSER the config is not required.
@ -15986,7 +16415,10 @@ To indicate that the request should never timeout, set it to `-1`.
Update Watcher index settings.
Update settings for the Watcher internal index (`.watches`).
Only a subset of settings can be modified.
This includes `index.auto_expand_replicas` and `index.number_of_replicas`.
This includes `index.auto_expand_replicas`, `index.number_of_replicas`, `index.routing.allocation.exclude.*`,
`index.routing.allocation.include.*` and `index.routing.allocation.require.*`.
Modification of `index.routing.allocation.include._tier_preference` is an exception and is not allowed as the
Watcher shards must always be in the `data_content` tier.
{ref}/watcher-api-update-settings.html[Endpoint documentation]
[source,ts]

View File

@ -53,22 +53,15 @@ export default class Inference {
async chatCompletionUnified (this: That, params: T.InferenceChatCompletionUnifiedRequest | TB.InferenceChatCompletionUnifiedRequest, options?: TransportRequestOptions): Promise<T.InferenceChatCompletionUnifiedResponse>
async chatCompletionUnified (this: That, params: T.InferenceChatCompletionUnifiedRequest | TB.InferenceChatCompletionUnifiedRequest, options?: TransportRequestOptions): Promise<any> {
const acceptedPath: string[] = ['inference_id']
const acceptedBody: string[] = ['messages', 'model', 'max_completion_tokens', 'stop', 'temperature', 'tool_choice', 'tools', 'top_p']
const acceptedBody: string[] = ['chat_completion_request']
const querystring: Record<string, any> = {}
// @ts-expect-error
const userBody: any = params?.body
let body: Record<string, any> | string
if (typeof userBody === 'string') {
body = userBody
} else {
body = userBody != null ? { ...userBody } : undefined
}
let body: any = params.body ?? undefined
for (const key in params) {
if (acceptedBody.includes(key)) {
body = body ?? {}
// @ts-expect-error
body[key] = params[key]
body = params[key]
} else if (acceptedPath.includes(key)) {
continue
} else if (key !== 'body') {
@ -216,6 +209,43 @@ export default class Inference {
return await this.transport.request({ path, method, querystring, body, meta }, options)
}
/**
* Perform a chat completion task through the Elastic Inference Service (EIS). Perform a chat completion inference task with the `elastic` service.
* @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.18/post-inference-api.html | Elasticsearch API documentation}
*/
async postEisChatCompletion (this: That, params: T.InferencePostEisChatCompletionRequest | TB.InferencePostEisChatCompletionRequest, options?: TransportRequestOptionsWithOutMeta): Promise<T.InferencePostEisChatCompletionResponse>
async postEisChatCompletion (this: That, params: T.InferencePostEisChatCompletionRequest | TB.InferencePostEisChatCompletionRequest, options?: TransportRequestOptionsWithMeta): Promise<TransportResult<T.InferencePostEisChatCompletionResponse, unknown>>
async postEisChatCompletion (this: That, params: T.InferencePostEisChatCompletionRequest | TB.InferencePostEisChatCompletionRequest, options?: TransportRequestOptions): Promise<T.InferencePostEisChatCompletionResponse>
async postEisChatCompletion (this: That, params: T.InferencePostEisChatCompletionRequest | TB.InferencePostEisChatCompletionRequest, options?: TransportRequestOptions): Promise<any> {
const acceptedPath: string[] = ['eis_inference_id']
const acceptedBody: string[] = ['chat_completion_request']
const querystring: Record<string, any> = {}
// @ts-expect-error
let body: any = params.body ?? undefined
for (const key in params) {
if (acceptedBody.includes(key)) {
// @ts-expect-error
body = params[key]
} else if (acceptedPath.includes(key)) {
continue
} else if (key !== 'body') {
// @ts-expect-error
querystring[key] = params[key]
}
}
const method = 'POST'
const path = `/_inference/chat_completion/${encodeURIComponent(params.eis_inference_id.toString())}/_stream`
const meta: TransportRequestMetadata = {
name: 'inference.post_eis_chat_completion',
pathParts: {
eis_inference_id: params.eis_inference_id
}
}
return await this.transport.request({ path, method, querystring, body, meta }, options)
}
/**
* Create an inference endpoint. When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. After creating the endpoint, wait for the model deployment to complete before using it. To verify the deployment status, use the get trained model statistics API. Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.
* @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.18/put-inference-api.html | Elasticsearch API documentation}
@ -261,6 +291,276 @@ export default class Inference {
return await this.transport.request({ path, method, querystring, body, meta }, options)
}
/**
* Create an AlibabaCloud AI Search inference endpoint. Create an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service. When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. After creating the endpoint, wait for the model deployment to complete before using it. To verify the deployment status, use the get trained model statistics API. Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
* @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-alibabacloud-ai-search.html | Elasticsearch API documentation}
*/
async putAlibabacloud (this: That, params: T.InferencePutAlibabacloudRequest | TB.InferencePutAlibabacloudRequest, options?: TransportRequestOptionsWithOutMeta): Promise<T.InferencePutAlibabacloudResponse>
async putAlibabacloud (this: That, params: T.InferencePutAlibabacloudRequest | TB.InferencePutAlibabacloudRequest, options?: TransportRequestOptionsWithMeta): Promise<TransportResult<T.InferencePutAlibabacloudResponse, unknown>>
async putAlibabacloud (this: That, params: T.InferencePutAlibabacloudRequest | TB.InferencePutAlibabacloudRequest, options?: TransportRequestOptions): Promise<T.InferencePutAlibabacloudResponse>
async putAlibabacloud (this: That, params: T.InferencePutAlibabacloudRequest | TB.InferencePutAlibabacloudRequest, options?: TransportRequestOptions): Promise<any> {
const acceptedPath: string[] = ['task_type', 'alibabacloud_inference_id']
const acceptedBody: string[] = ['chunking_settings', 'service', 'service_settings', 'task_settings']
const querystring: Record<string, any> = {}
// @ts-expect-error
const userBody: any = params?.body
let body: Record<string, any> | string
if (typeof userBody === 'string') {
body = userBody
} else {
body = userBody != null ? { ...userBody } : undefined
}
for (const key in params) {
if (acceptedBody.includes(key)) {
body = body ?? {}
// @ts-expect-error
body[key] = params[key]
} else if (acceptedPath.includes(key)) {
continue
} else if (key !== 'body') {
// @ts-expect-error
querystring[key] = params[key]
}
}
const method = 'PUT'
const path = `/_inference/${encodeURIComponent(params.task_type.toString())}/${encodeURIComponent(params.alibabacloud_inference_id.toString())}`
const meta: TransportRequestMetadata = {
name: 'inference.put_alibabacloud',
pathParts: {
task_type: params.task_type,
alibabacloud_inference_id: params.alibabacloud_inference_id
}
}
return await this.transport.request({ path, method, querystring, body, meta }, options)
}
/**
* Create an Amazon Bedrock inference endpoint. Creates an inference endpoint to perform an inference task with the `amazonbedrock` service. >info > You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys. When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. After creating the endpoint, wait for the model deployment to complete before using it. To verify the deployment status, use the get trained model statistics API. Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
* @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-amazon-bedrock.html | Elasticsearch API documentation}
*/
async putAmazonbedrock (this: That, params: T.InferencePutAmazonbedrockRequest | TB.InferencePutAmazonbedrockRequest, options?: TransportRequestOptionsWithOutMeta): Promise<T.InferencePutAmazonbedrockResponse>
async putAmazonbedrock (this: That, params: T.InferencePutAmazonbedrockRequest | TB.InferencePutAmazonbedrockRequest, options?: TransportRequestOptionsWithMeta): Promise<TransportResult<T.InferencePutAmazonbedrockResponse, unknown>>
async putAmazonbedrock (this: That, params: T.InferencePutAmazonbedrockRequest | TB.InferencePutAmazonbedrockRequest, options?: TransportRequestOptions): Promise<T.InferencePutAmazonbedrockResponse>
async putAmazonbedrock (this: That, params: T.InferencePutAmazonbedrockRequest | TB.InferencePutAmazonbedrockRequest, options?: TransportRequestOptions): Promise<any> {
const acceptedPath: string[] = ['task_type', 'amazonbedrock_inference_id']
const acceptedBody: string[] = ['chunking_settings', 'service', 'service_settings', 'task_settings']
const querystring: Record<string, any> = {}
// @ts-expect-error
const userBody: any = params?.body
let body: Record<string, any> | string
if (typeof userBody === 'string') {
body = userBody
} else {
body = userBody != null ? { ...userBody } : undefined
}
for (const key in params) {
if (acceptedBody.includes(key)) {
body = body ?? {}
// @ts-expect-error
body[key] = params[key]
} else if (acceptedPath.includes(key)) {
continue
} else if (key !== 'body') {
// @ts-expect-error
querystring[key] = params[key]
}
}
const method = 'PUT'
const path = `/_inference/${encodeURIComponent(params.task_type.toString())}/${encodeURIComponent(params.amazonbedrock_inference_id.toString())}`
const meta: TransportRequestMetadata = {
name: 'inference.put_amazonbedrock',
pathParts: {
task_type: params.task_type,
amazonbedrock_inference_id: params.amazonbedrock_inference_id
}
}
return await this.transport.request({ path, method, querystring, body, meta }, options)
}
/**
* Create an Anthropic inference endpoint. Create an inference endpoint to perform an inference task with the `anthropic` service. When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. After creating the endpoint, wait for the model deployment to complete before using it. To verify the deployment status, use the get trained model statistics API. Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
* @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-anthropic.html | Elasticsearch API documentation}
*/
async putAnthropic (this: That, params: T.InferencePutAnthropicRequest | TB.InferencePutAnthropicRequest, options?: TransportRequestOptionsWithOutMeta): Promise<T.InferencePutAnthropicResponse>
async putAnthropic (this: That, params: T.InferencePutAnthropicRequest | TB.InferencePutAnthropicRequest, options?: TransportRequestOptionsWithMeta): Promise<TransportResult<T.InferencePutAnthropicResponse, unknown>>
async putAnthropic (this: That, params: T.InferencePutAnthropicRequest | TB.InferencePutAnthropicRequest, options?: TransportRequestOptions): Promise<T.InferencePutAnthropicResponse>
async putAnthropic (this: That, params: T.InferencePutAnthropicRequest | TB.InferencePutAnthropicRequest, options?: TransportRequestOptions): Promise<any> {
const acceptedPath: string[] = ['task_type', 'anthropic_inference_id']
const acceptedBody: string[] = ['chunking_settings', 'service', 'service_settings', 'task_settings']
const querystring: Record<string, any> = {}
// @ts-expect-error
const userBody: any = params?.body
let body: Record<string, any> | string
if (typeof userBody === 'string') {
body = userBody
} else {
body = userBody != null ? { ...userBody } : undefined
}
for (const key in params) {
if (acceptedBody.includes(key)) {
body = body ?? {}
// @ts-expect-error
body[key] = params[key]
} else if (acceptedPath.includes(key)) {
continue
} else if (key !== 'body') {
// @ts-expect-error
querystring[key] = params[key]
}
}
const method = 'PUT'
const path = `/_inference/${encodeURIComponent(params.task_type.toString())}/${encodeURIComponent(params.anthropic_inference_id.toString())}`
const meta: TransportRequestMetadata = {
name: 'inference.put_anthropic',
pathParts: {
task_type: params.task_type,
anthropic_inference_id: params.anthropic_inference_id
}
}
return await this.transport.request({ path, method, querystring, body, meta }, options)
}
/**
* Create an Azure AI studio inference endpoint. Create an inference endpoint to perform an inference task with the `azureaistudio` service. When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. After creating the endpoint, wait for the model deployment to complete before using it. To verify the deployment status, use the get trained model statistics API. Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
* @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-ai-studio.html | Elasticsearch API documentation}
*/
async putAzureaistudio (this: That, params: T.InferencePutAzureaistudioRequest | TB.InferencePutAzureaistudioRequest, options?: TransportRequestOptionsWithOutMeta): Promise<T.InferencePutAzureaistudioResponse>
async putAzureaistudio (this: That, params: T.InferencePutAzureaistudioRequest | TB.InferencePutAzureaistudioRequest, options?: TransportRequestOptionsWithMeta): Promise<TransportResult<T.InferencePutAzureaistudioResponse, unknown>>
async putAzureaistudio (this: That, params: T.InferencePutAzureaistudioRequest | TB.InferencePutAzureaistudioRequest, options?: TransportRequestOptions): Promise<T.InferencePutAzureaistudioResponse>
async putAzureaistudio (this: That, params: T.InferencePutAzureaistudioRequest | TB.InferencePutAzureaistudioRequest, options?: TransportRequestOptions): Promise<any> {
const acceptedPath: string[] = ['task_type', 'azureaistudio_inference_id']
const acceptedBody: string[] = ['chunking_settings', 'service', 'service_settings', 'task_settings']
const querystring: Record<string, any> = {}
// @ts-expect-error
const userBody: any = params?.body
let body: Record<string, any> | string
if (typeof userBody === 'string') {
body = userBody
} else {
body = userBody != null ? { ...userBody } : undefined
}
for (const key in params) {
if (acceptedBody.includes(key)) {
body = body ?? {}
// @ts-expect-error
body[key] = params[key]
} else if (acceptedPath.includes(key)) {
continue
} else if (key !== 'body') {
// @ts-expect-error
querystring[key] = params[key]
}
}
const method = 'PUT'
const path = `/_inference/${encodeURIComponent(params.task_type.toString())}/${encodeURIComponent(params.azureaistudio_inference_id.toString())}`
const meta: TransportRequestMetadata = {
name: 'inference.put_azureaistudio',
pathParts: {
task_type: params.task_type,
azureaistudio_inference_id: params.azureaistudio_inference_id
}
}
return await this.transport.request({ path, method, querystring, body, meta }, options)
}
/**
* Create an Azure OpenAI inference endpoint. Create an inference endpoint to perform an inference task with the `azureopenai` service. The list of chat completion models that you can choose from in your Azure OpenAI deployment include: * [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models) * [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35) The list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings). When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. After creating the endpoint, wait for the model deployment to complete before using it. To verify the deployment status, use the get trained model statistics API. Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
* @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-openai.html | Elasticsearch API documentation}
*/
async putAzureopenai (this: That, params: T.InferencePutAzureopenaiRequest | TB.InferencePutAzureopenaiRequest, options?: TransportRequestOptionsWithOutMeta): Promise<T.InferencePutAzureopenaiResponse>
async putAzureopenai (this: That, params: T.InferencePutAzureopenaiRequest | TB.InferencePutAzureopenaiRequest, options?: TransportRequestOptionsWithMeta): Promise<TransportResult<T.InferencePutAzureopenaiResponse, unknown>>
async putAzureopenai (this: That, params: T.InferencePutAzureopenaiRequest | TB.InferencePutAzureopenaiRequest, options?: TransportRequestOptions): Promise<T.InferencePutAzureopenaiResponse>
async putAzureopenai (this: That, params: T.InferencePutAzureopenaiRequest | TB.InferencePutAzureopenaiRequest, options?: TransportRequestOptions): Promise<any> {
const acceptedPath: string[] = ['task_type', 'azureopenai_inference_id']
const acceptedBody: string[] = ['chunking_settings', 'service', 'service_settings', 'task_settings']
const querystring: Record<string, any> = {}
// @ts-expect-error
const userBody: any = params?.body
let body: Record<string, any> | string
if (typeof userBody === 'string') {
body = userBody
} else {
body = userBody != null ? { ...userBody } : undefined
}
for (const key in params) {
if (acceptedBody.includes(key)) {
body = body ?? {}
// @ts-expect-error
body[key] = params[key]
} else if (acceptedPath.includes(key)) {
continue
} else if (key !== 'body') {
// @ts-expect-error
querystring[key] = params[key]
}
}
const method = 'PUT'
const path = `/_inference/${encodeURIComponent(params.task_type.toString())}/${encodeURIComponent(params.azureopenai_inference_id.toString())}`
const meta: TransportRequestMetadata = {
name: 'inference.put_azureopenai',
pathParts: {
task_type: params.task_type,
azureopenai_inference_id: params.azureopenai_inference_id
}
}
return await this.transport.request({ path, method, querystring, body, meta }, options)
}
/**
* Create a Cohere inference endpoint. Create an inference endpoint to perform an inference task with the `cohere` service. When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. After creating the endpoint, wait for the model deployment to complete before using it. To verify the deployment status, use the get trained model statistics API. Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
* @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-cohere.html | Elasticsearch API documentation}
*/
async putCohere (this: That, params: T.InferencePutCohereRequest | TB.InferencePutCohereRequest, options?: TransportRequestOptionsWithOutMeta): Promise<T.InferencePutCohereResponse>
async putCohere (this: That, params: T.InferencePutCohereRequest | TB.InferencePutCohereRequest, options?: TransportRequestOptionsWithMeta): Promise<TransportResult<T.InferencePutCohereResponse, unknown>>
async putCohere (this: That, params: T.InferencePutCohereRequest | TB.InferencePutCohereRequest, options?: TransportRequestOptions): Promise<T.InferencePutCohereResponse>
async putCohere (this: That, params: T.InferencePutCohereRequest | TB.InferencePutCohereRequest, options?: TransportRequestOptions): Promise<any> {
const acceptedPath: string[] = ['task_type', 'cohere_inference_id']
const acceptedBody: string[] = ['chunking_settings', 'service', 'service_settings', 'task_settings']
const querystring: Record<string, any> = {}
// @ts-expect-error
const userBody: any = params?.body
let body: Record<string, any> | string
if (typeof userBody === 'string') {
body = userBody
} else {
body = userBody != null ? { ...userBody } : undefined
}
for (const key in params) {
if (acceptedBody.includes(key)) {
body = body ?? {}
// @ts-expect-error
body[key] = params[key]
} else if (acceptedPath.includes(key)) {
continue
} else if (key !== 'body') {
// @ts-expect-error
querystring[key] = params[key]
}
}
const method = 'PUT'
const path = `/_inference/${encodeURIComponent(params.task_type.toString())}/${encodeURIComponent(params.cohere_inference_id.toString())}`
const meta: TransportRequestMetadata = {
name: 'inference.put_cohere',
pathParts: {
task_type: params.task_type,
cohere_inference_id: params.cohere_inference_id
}
}
return await this.transport.request({ path, method, querystring, body, meta }, options)
}
/**
* Create an Elastic Inference Service (EIS) inference endpoint. Create an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).
* @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-elastic.html | Elasticsearch API documentation}
@ -307,22 +607,304 @@ export default class Inference {
}
/**
* Configure a Mistral inference endpoint
* @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-mistral.html | Elasticsearch API documentation}
* Create an Elasticsearch inference endpoint. Create an inference endpoint to perform an inference task with the `elasticsearch` service. > info > Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings. If you use the ELSER or the E5 model through the `elasticsearch` service, the API request will automatically download and deploy the model if it isn't downloaded yet. > info > You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value. After creating the endpoint, wait for the model deployment to complete before using it. To verify the deployment status, use the get trained model statistics API. Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
* @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-elasticsearch.html | Elasticsearch API documentation}
*/
async putMistral (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptionsWithOutMeta): Promise<T.TODO>
async putMistral (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptionsWithMeta): Promise<TransportResult<T.TODO, unknown>>
async putMistral (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptions): Promise<T.TODO>
async putMistral (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptions): Promise<any> {
const acceptedPath: string[] = ['task_type', 'mistral_inference_id']
async putElasticsearch (this: That, params: T.InferencePutElasticsearchRequest | TB.InferencePutElasticsearchRequest, options?: TransportRequestOptionsWithOutMeta): Promise<T.InferencePutElasticsearchResponse>
async putElasticsearch (this: That, params: T.InferencePutElasticsearchRequest | TB.InferencePutElasticsearchRequest, options?: TransportRequestOptionsWithMeta): Promise<TransportResult<T.InferencePutElasticsearchResponse, unknown>>
async putElasticsearch (this: That, params: T.InferencePutElasticsearchRequest | TB.InferencePutElasticsearchRequest, options?: TransportRequestOptions): Promise<T.InferencePutElasticsearchResponse>
async putElasticsearch (this: That, params: T.InferencePutElasticsearchRequest | TB.InferencePutElasticsearchRequest, options?: TransportRequestOptions): Promise<any> {
const acceptedPath: string[] = ['task_type', 'elasticsearch_inference_id']
const acceptedBody: string[] = ['chunking_settings', 'service', 'service_settings', 'task_settings']
const querystring: Record<string, any> = {}
const body = undefined
// @ts-expect-error
const userBody: any = params?.body
let body: Record<string, any> | string
if (typeof userBody === 'string') {
body = userBody
} else {
body = userBody != null ? { ...userBody } : undefined
}
params = params ?? {}
for (const key in params) {
if (acceptedPath.includes(key)) {
if (acceptedBody.includes(key)) {
body = body ?? {}
// @ts-expect-error
body[key] = params[key]
} else if (acceptedPath.includes(key)) {
continue
} else if (key !== 'body') {
// @ts-expect-error
querystring[key] = params[key]
}
}
const method = 'PUT'
const path = `/_inference/${encodeURIComponent(params.task_type.toString())}/${encodeURIComponent(params.elasticsearch_inference_id.toString())}`
const meta: TransportRequestMetadata = {
name: 'inference.put_elasticsearch',
pathParts: {
task_type: params.task_type,
elasticsearch_inference_id: params.elasticsearch_inference_id
}
}
return await this.transport.request({ path, method, querystring, body, meta }, options)
}
/**
* Create an ELSER inference endpoint. Create an inference endpoint to perform an inference task with the `elser` service. You can also deploy ELSER by using the Elasticsearch inference integration. > info > Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings. The API request will automatically download and deploy the ELSER model if it isn't already downloaded. > info > You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value. After creating the endpoint, wait for the model deployment to complete before using it. To verify the deployment status, use the get trained model statistics API. Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
* @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-elser.html | Elasticsearch API documentation}
*/
async putElser (this: That, params: T.InferencePutElserRequest | TB.InferencePutElserRequest, options?: TransportRequestOptionsWithOutMeta): Promise<T.InferencePutElserResponse>
async putElser (this: That, params: T.InferencePutElserRequest | TB.InferencePutElserRequest, options?: TransportRequestOptionsWithMeta): Promise<TransportResult<T.InferencePutElserResponse, unknown>>
async putElser (this: That, params: T.InferencePutElserRequest | TB.InferencePutElserRequest, options?: TransportRequestOptions): Promise<T.InferencePutElserResponse>
async putElser (this: That, params: T.InferencePutElserRequest | TB.InferencePutElserRequest, options?: TransportRequestOptions): Promise<any> {
const acceptedPath: string[] = ['task_type', 'elser_inference_id']
const acceptedBody: string[] = ['chunking_settings', 'service', 'service_settings']
const querystring: Record<string, any> = {}
// @ts-expect-error
const userBody: any = params?.body
let body: Record<string, any> | string
if (typeof userBody === 'string') {
body = userBody
} else {
body = userBody != null ? { ...userBody } : undefined
}
for (const key in params) {
if (acceptedBody.includes(key)) {
body = body ?? {}
// @ts-expect-error
body[key] = params[key]
} else if (acceptedPath.includes(key)) {
continue
} else if (key !== 'body') {
// @ts-expect-error
querystring[key] = params[key]
}
}
const method = 'PUT'
const path = `/_inference/${encodeURIComponent(params.task_type.toString())}/${encodeURIComponent(params.elser_inference_id.toString())}`
const meta: TransportRequestMetadata = {
name: 'inference.put_elser',
pathParts: {
task_type: params.task_type,
elser_inference_id: params.elser_inference_id
}
}
return await this.transport.request({ path, method, querystring, body, meta }, options)
}
/**
* Create an Google AI Studio inference endpoint. Create an inference endpoint to perform an inference task with the `googleaistudio` service. When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. After creating the endpoint, wait for the model deployment to complete before using it. To verify the deployment status, use the get trained model statistics API. Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
* @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-google-ai-studio.html | Elasticsearch API documentation}
*/
async putGoogleaistudio (this: That, params: T.InferencePutGoogleaistudioRequest | TB.InferencePutGoogleaistudioRequest, options?: TransportRequestOptionsWithOutMeta): Promise<T.InferencePutGoogleaistudioResponse>
async putGoogleaistudio (this: That, params: T.InferencePutGoogleaistudioRequest | TB.InferencePutGoogleaistudioRequest, options?: TransportRequestOptionsWithMeta): Promise<TransportResult<T.InferencePutGoogleaistudioResponse, unknown>>
async putGoogleaistudio (this: That, params: T.InferencePutGoogleaistudioRequest | TB.InferencePutGoogleaistudioRequest, options?: TransportRequestOptions): Promise<T.InferencePutGoogleaistudioResponse>
async putGoogleaistudio (this: That, params: T.InferencePutGoogleaistudioRequest | TB.InferencePutGoogleaistudioRequest, options?: TransportRequestOptions): Promise<any> {
const acceptedPath: string[] = ['task_type', 'googleaistudio_inference_id']
const acceptedBody: string[] = ['chunking_settings', 'service', 'service_settings']
const querystring: Record<string, any> = {}
// @ts-expect-error
const userBody: any = params?.body
let body: Record<string, any> | string
if (typeof userBody === 'string') {
body = userBody
} else {
body = userBody != null ? { ...userBody } : undefined
}
for (const key in params) {
if (acceptedBody.includes(key)) {
body = body ?? {}
// @ts-expect-error
body[key] = params[key]
} else if (acceptedPath.includes(key)) {
continue
} else if (key !== 'body') {
// @ts-expect-error
querystring[key] = params[key]
}
}
const method = 'PUT'
const path = `/_inference/${encodeURIComponent(params.task_type.toString())}/${encodeURIComponent(params.googleaistudio_inference_id.toString())}`
const meta: TransportRequestMetadata = {
name: 'inference.put_googleaistudio',
pathParts: {
task_type: params.task_type,
googleaistudio_inference_id: params.googleaistudio_inference_id
}
}
return await this.transport.request({ path, method, querystring, body, meta }, options)
}
/**
* Create a Google Vertex AI inference endpoint. Create an inference endpoint to perform an inference task with the `googlevertexai` service. When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. After creating the endpoint, wait for the model deployment to complete before using it. To verify the deployment status, use the get trained model statistics API. Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
* @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-google-vertex-ai.html | Elasticsearch API documentation}
*/
async putGooglevertexai (this: That, params: T.InferencePutGooglevertexaiRequest | TB.InferencePutGooglevertexaiRequest, options?: TransportRequestOptionsWithOutMeta): Promise<T.InferencePutGooglevertexaiResponse>
async putGooglevertexai (this: That, params: T.InferencePutGooglevertexaiRequest | TB.InferencePutGooglevertexaiRequest, options?: TransportRequestOptionsWithMeta): Promise<TransportResult<T.InferencePutGooglevertexaiResponse, unknown>>
async putGooglevertexai (this: That, params: T.InferencePutGooglevertexaiRequest | TB.InferencePutGooglevertexaiRequest, options?: TransportRequestOptions): Promise<T.InferencePutGooglevertexaiResponse>
async putGooglevertexai (this: That, params: T.InferencePutGooglevertexaiRequest | TB.InferencePutGooglevertexaiRequest, options?: TransportRequestOptions): Promise<any> {
const acceptedPath: string[] = ['task_type', 'googlevertexai_inference_id']
const acceptedBody: string[] = ['chunking_settings', 'service', 'service_settings', 'task_settings']
const querystring: Record<string, any> = {}
// @ts-expect-error
const userBody: any = params?.body
let body: Record<string, any> | string
if (typeof userBody === 'string') {
body = userBody
} else {
body = userBody != null ? { ...userBody } : undefined
}
for (const key in params) {
if (acceptedBody.includes(key)) {
body = body ?? {}
// @ts-expect-error
body[key] = params[key]
} else if (acceptedPath.includes(key)) {
continue
} else if (key !== 'body') {
// @ts-expect-error
querystring[key] = params[key]
}
}
const method = 'PUT'
const path = `/_inference/${encodeURIComponent(params.task_type.toString())}/${encodeURIComponent(params.googlevertexai_inference_id.toString())}`
const meta: TransportRequestMetadata = {
name: 'inference.put_googlevertexai',
pathParts: {
task_type: params.task_type,
googlevertexai_inference_id: params.googlevertexai_inference_id
}
}
return await this.transport.request({ path, method, querystring, body, meta }, options)
}
/**
* Create a Hugging Face inference endpoint. Create an inference endpoint to perform an inference task with the `hugging_face` service. You must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL. Select the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section. Create the endpoint and copy the URL after the endpoint initialization has been finished. The following models are recommended for the Hugging Face service: * `all-MiniLM-L6-v2` * `all-MiniLM-L12-v2` * `all-mpnet-base-v2` * `e5-base-v2` * `e5-small-v2` * `multilingual-e5-base` * `multilingual-e5-small` When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. After creating the endpoint, wait for the model deployment to complete before using it. To verify the deployment status, use the get trained model statistics API. Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
* @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-hugging-face.html | Elasticsearch API documentation}
*/
async putHuggingFace (this: That, params: T.InferencePutHuggingFaceRequest | TB.InferencePutHuggingFaceRequest, options?: TransportRequestOptionsWithOutMeta): Promise<T.InferencePutHuggingFaceResponse>
async putHuggingFace (this: That, params: T.InferencePutHuggingFaceRequest | TB.InferencePutHuggingFaceRequest, options?: TransportRequestOptionsWithMeta): Promise<TransportResult<T.InferencePutHuggingFaceResponse, unknown>>
async putHuggingFace (this: That, params: T.InferencePutHuggingFaceRequest | TB.InferencePutHuggingFaceRequest, options?: TransportRequestOptions): Promise<T.InferencePutHuggingFaceResponse>
async putHuggingFace (this: That, params: T.InferencePutHuggingFaceRequest | TB.InferencePutHuggingFaceRequest, options?: TransportRequestOptions): Promise<any> {
const acceptedPath: string[] = ['task_type', 'huggingface_inference_id']
const acceptedBody: string[] = ['chunking_settings', 'service', 'service_settings']
const querystring: Record<string, any> = {}
// @ts-expect-error
const userBody: any = params?.body
let body: Record<string, any> | string
if (typeof userBody === 'string') {
body = userBody
} else {
body = userBody != null ? { ...userBody } : undefined
}
for (const key in params) {
if (acceptedBody.includes(key)) {
body = body ?? {}
// @ts-expect-error
body[key] = params[key]
} else if (acceptedPath.includes(key)) {
continue
} else if (key !== 'body') {
// @ts-expect-error
querystring[key] = params[key]
}
}
const method = 'PUT'
const path = `/_inference/${encodeURIComponent(params.task_type.toString())}/${encodeURIComponent(params.huggingface_inference_id.toString())}`
const meta: TransportRequestMetadata = {
name: 'inference.put_hugging_face',
pathParts: {
task_type: params.task_type,
huggingface_inference_id: params.huggingface_inference_id
}
}
return await this.transport.request({ path, method, querystring, body, meta }, options)
}
/**
* Create an JinaAI inference endpoint. Create an inference endpoint to perform an inference task with the `jinaai` service. To review the available `rerank` models, refer to <https://jina.ai/reranker>. To review the available `text_embedding` models, refer to the <https://jina.ai/embeddings/>. When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. After creating the endpoint, wait for the model deployment to complete before using it. To verify the deployment status, use the get trained model statistics API. Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
* @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-jinaai.html | Elasticsearch API documentation}
*/
async putJinaai (this: That, params: T.InferencePutJinaaiRequest | TB.InferencePutJinaaiRequest, options?: TransportRequestOptionsWithOutMeta): Promise<T.InferencePutJinaaiResponse>
async putJinaai (this: That, params: T.InferencePutJinaaiRequest | TB.InferencePutJinaaiRequest, options?: TransportRequestOptionsWithMeta): Promise<TransportResult<T.InferencePutJinaaiResponse, unknown>>
async putJinaai (this: That, params: T.InferencePutJinaaiRequest | TB.InferencePutJinaaiRequest, options?: TransportRequestOptions): Promise<T.InferencePutJinaaiResponse>
async putJinaai (this: That, params: T.InferencePutJinaaiRequest | TB.InferencePutJinaaiRequest, options?: TransportRequestOptions): Promise<any> {
const acceptedPath: string[] = ['task_type', 'jinaai_inference_id']
const acceptedBody: string[] = ['chunking_settings', 'service', 'service_settings', 'task_settings']
const querystring: Record<string, any> = {}
// @ts-expect-error
const userBody: any = params?.body
let body: Record<string, any> | string
if (typeof userBody === 'string') {
body = userBody
} else {
body = userBody != null ? { ...userBody } : undefined
}
for (const key in params) {
if (acceptedBody.includes(key)) {
body = body ?? {}
// @ts-expect-error
body[key] = params[key]
} else if (acceptedPath.includes(key)) {
continue
} else if (key !== 'body') {
// @ts-expect-error
querystring[key] = params[key]
}
}
const method = 'PUT'
const path = `/_inference/${encodeURIComponent(params.task_type.toString())}/${encodeURIComponent(params.jinaai_inference_id.toString())}`
const meta: TransportRequestMetadata = {
name: 'inference.put_jinaai',
pathParts: {
task_type: params.task_type,
jinaai_inference_id: params.jinaai_inference_id
}
}
return await this.transport.request({ path, method, querystring, body, meta }, options)
}
/**
* Create a Mistral inference endpoint. Creates an inference endpoint to perform an inference task with the `mistral` service. When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. After creating the endpoint, wait for the model deployment to complete before using it. To verify the deployment status, use the get trained model statistics API. Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
* @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/{brnach}/infer-service-mistral.html | Elasticsearch API documentation}
*/
async putMistral (this: That, params: T.InferencePutMistralRequest | TB.InferencePutMistralRequest, options?: TransportRequestOptionsWithOutMeta): Promise<T.InferencePutMistralResponse>
async putMistral (this: That, params: T.InferencePutMistralRequest | TB.InferencePutMistralRequest, options?: TransportRequestOptionsWithMeta): Promise<TransportResult<T.InferencePutMistralResponse, unknown>>
async putMistral (this: That, params: T.InferencePutMistralRequest | TB.InferencePutMistralRequest, options?: TransportRequestOptions): Promise<T.InferencePutMistralResponse>
async putMistral (this: That, params: T.InferencePutMistralRequest | TB.InferencePutMistralRequest, options?: TransportRequestOptions): Promise<any> {
const acceptedPath: string[] = ['task_type', 'mistral_inference_id']
const acceptedBody: string[] = ['chunking_settings', 'service', 'service_settings']
const querystring: Record<string, any> = {}
// @ts-expect-error
const userBody: any = params?.body
let body: Record<string, any> | string
if (typeof userBody === 'string') {
body = userBody
} else {
body = userBody != null ? { ...userBody } : undefined
}
for (const key in params) {
if (acceptedBody.includes(key)) {
body = body ?? {}
// @ts-expect-error
body[key] = params[key]
} else if (acceptedPath.includes(key)) {
continue
} else if (key !== 'body') {
// @ts-expect-error
querystring[key] = params[key]
}
}
@ -340,7 +922,7 @@ export default class Inference {
}
/**
* Create an OpenAI inference endpoint. Create an inference endpoint to perform an inference task with the `openai` service. When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. After creating the endpoint, wait for the model deployment to complete before using it. To verify the deployment status, use the get trained model statistics API. Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
* Create an OpenAI inference endpoint. Create an inference endpoint to perform an inference task with the `openai` service or `openai` compatible APIs. When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. After creating the endpoint, wait for the model deployment to complete before using it. To verify the deployment status, use the get trained model statistics API. Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
* @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-openai.html | Elasticsearch API documentation}
*/
async putOpenai (this: That, params: T.InferencePutOpenaiRequest | TB.InferencePutOpenaiRequest, options?: TransportRequestOptionsWithOutMeta): Promise<T.InferencePutOpenaiResponse>

View File

@ -481,7 +481,7 @@ export default class Watcher {
}
/**
* Update Watcher index settings. Update settings for the Watcher internal index (`.watches`). Only a subset of settings can be modified. This includes `index.auto_expand_replicas` and `index.number_of_replicas`.
* Update Watcher index settings. Update settings for the Watcher internal index (`.watches`). Only a subset of settings can be modified. This includes `index.auto_expand_replicas`, `index.number_of_replicas`, `index.routing.allocation.exclude.*`, `index.routing.allocation.include.*` and `index.routing.allocation.require.*`. Modification of `index.routing.allocation.include._tier_preference` is an exception and is not allowed as the Watcher shards must always be in the `data_content` tier.
* @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.18/watcher-api-update-settings.html | Elasticsearch API documentation}
*/
async updateSettings (this: That, params?: T.WatcherUpdateSettingsRequest | TB.WatcherUpdateSettingsRequest, options?: TransportRequestOptionsWithOutMeta): Promise<T.WatcherUpdateSettingsResponse>

View File

@ -760,7 +760,7 @@ export interface MsearchMultisearchBody {
knn?: KnnSearch | KnnSearch[]
from?: integer
highlight?: SearchHighlight
indices_boost?: Record<IndexName, double>[]
indices_boost?: Partial<Record<IndexName, double>>[]
min_score?: double
post_filter?: QueryDslQueryContainer
profile?: boolean
@ -898,6 +898,7 @@ export interface OpenPointInTimeRequest extends RequestBase {
routing?: Routing
expand_wildcards?: ExpandWildcards
allow_partial_search_results?: boolean
max_concurrent_shard_requests?: integer
index_filter?: QueryDslQueryContainer
}
@ -1198,7 +1199,7 @@ export interface SearchRequest extends RequestBase {
from?: integer
highlight?: SearchHighlight
track_total_hits?: SearchTrackHits
indices_boost?: Record<IndexName, double>[]
indices_boost?: Partial<Record<IndexName, double>>[]
docvalue_fields?: (QueryDslFieldAndFormat | Field)[]
knn?: KnnSearch | KnnSearch[]
rank?: RankContainer
@ -5305,6 +5306,10 @@ export interface MappingBooleanProperty extends MappingDocValuesPropertyBase {
fielddata?: IndicesNumericFielddata
index?: boolean
null_value?: boolean
ignore_malformed?: boolean
script?: Script | string
on_script_error?: MappingOnScriptError
time_series_dimension?: boolean
type: 'boolean'
}
@ -5386,7 +5391,7 @@ export interface MappingDenseVectorIndexOptions {
type: MappingDenseVectorIndexOptionsType
}
export type MappingDenseVectorIndexOptionsType = 'flat' | 'hnsw' | 'int4_flat' | 'int4_hnsw' | 'int8_flat' | 'int8_hnsw'
export type MappingDenseVectorIndexOptionsType = 'bbq_flat' | 'bbq_hnsw' | 'flat' | 'hnsw' | 'int4_flat' | 'int4_hnsw' | 'int8_flat' | 'int8_hnsw'
export interface MappingDenseVectorProperty extends MappingPropertyBase {
type: 'dense_vector'
@ -6761,7 +6766,7 @@ export interface AsyncSearchSubmitRequest extends RequestBase {
from?: integer
highlight?: SearchHighlight
track_total_hits?: SearchTrackHits
indices_boost?: Record<IndexName, double>[]
indices_boost?: Partial<Record<IndexName, double>>[]
docvalue_fields?: (QueryDslFieldAndFormat | Field)[]
knn?: KnnSearch | KnnSearch[]
min_score?: double
@ -10252,10 +10257,11 @@ export interface EnrichDeletePolicyRequest extends RequestBase {
export type EnrichDeletePolicyResponse = AcknowledgedResponseBase
export type EnrichExecutePolicyEnrichPolicyPhase = 'SCHEDULED' | 'RUNNING' | 'COMPLETE' | 'FAILED'
export type EnrichExecutePolicyEnrichPolicyPhase = 'SCHEDULED' | 'RUNNING' | 'COMPLETE' | 'FAILED' | 'CANCELLED'
export interface EnrichExecutePolicyExecuteEnrichPolicyStatus {
phase: EnrichExecutePolicyEnrichPolicyPhase
step?: string
}
export interface EnrichExecutePolicyRequest extends RequestBase {
@ -10266,7 +10272,7 @@ export interface EnrichExecutePolicyRequest extends RequestBase {
export interface EnrichExecutePolicyResponse {
status?: EnrichExecutePolicyExecuteEnrichPolicyStatus
task_id?: TaskId
task?: TaskId
}
export interface EnrichGetPolicyRequest extends RequestBase {
@ -10579,7 +10585,7 @@ export interface FleetSearchRequest extends RequestBase {
from?: integer
highlight?: SearchHighlight
track_total_hits?: SearchTrackHits
indices_boost?: Record<IndexName, double>[]
indices_boost?: Partial<Record<IndexName, double>>[]
docvalue_fields?: (QueryDslFieldAndFormat | Field)[]
min_score?: double
post_filter?: QueryDslQueryContainer
@ -12840,6 +12846,130 @@ export interface IndicesValidateQueryResponse {
error?: string
}
export interface InferenceAdaptiveAllocations {
enabled?: boolean
max_number_of_allocations?: integer
min_number_of_allocations?: integer
}
export interface InferenceAlibabaCloudServiceSettings {
api_key: string
host: string
rate_limit?: InferenceRateLimitSetting
service_id: string
workspace: string
}
export type InferenceAlibabaCloudServiceType = 'alibabacloud-ai-search'
export interface InferenceAlibabaCloudTaskSettings {
input_type?: string
return_token?: boolean
}
export type InferenceAlibabaCloudTaskType = 'completion' | 'rerank' | 'space_embedding' | 'text_embedding'
export interface InferenceAmazonBedrockServiceSettings {
access_key: string
model: string
provider?: string
region: string
rate_limit?: InferenceRateLimitSetting
secret_key: string
}
export type InferenceAmazonBedrockServiceType = 'amazonbedrock'
export interface InferenceAmazonBedrockTaskSettings {
max_new_tokens?: integer
temperature?: float
top_k?: float
top_p?: float
}
export type InferenceAmazonBedrockTaskType = 'completion' | 'text_embedding'
export interface InferenceAnthropicServiceSettings {
api_key: string
model_id: string
rate_limit?: InferenceRateLimitSetting
}
export type InferenceAnthropicServiceType = 'anthropic'
export interface InferenceAnthropicTaskSettings {
max_tokens: integer
temperature?: float
top_k?: integer
top_p?: float
}
export type InferenceAnthropicTaskType = 'completion'
export interface InferenceAzureAiStudioServiceSettings {
api_key: string
endpoint_type: string
target: string
provider: string
rate_limit?: InferenceRateLimitSetting
}
export type InferenceAzureAiStudioServiceType = 'azureaistudio'
export interface InferenceAzureAiStudioTaskSettings {
do_sample?: float
max_new_tokens?: integer
temperature?: float
top_p?: float
user?: string
}
export type InferenceAzureAiStudioTaskType = 'completion' | 'text_embedding'
export interface InferenceAzureOpenAIServiceSettings {
api_key?: string
api_version: string
deployment_id: string
entra_id?: string
rate_limit?: InferenceRateLimitSetting
resource_name: string
}
export type InferenceAzureOpenAIServiceType = 'azureopenai'
export interface InferenceAzureOpenAITaskSettings {
user?: string
}
export type InferenceAzureOpenAITaskType = 'completion' | 'text_embedding'
export type InferenceCohereEmbeddingType = 'byte' | 'float' | 'int8'
export type InferenceCohereInputType = 'classification' | 'clustering' | 'ingest' | 'search'
export interface InferenceCohereServiceSettings {
api_key: string
embedding_type?: InferenceCohereEmbeddingType
model_id?: string
rate_limit?: InferenceRateLimitSetting
similarity?: InferenceCohereSimilarityType
}
export type InferenceCohereServiceType = 'cohere'
export type InferenceCohereSimilarityType = 'cosine' | 'dot_product' | 'l2_norm'
export interface InferenceCohereTaskSettings {
input_type?: InferenceCohereInputType
return_documents?: boolean
top_n?: integer
truncate?: InferenceCohereTruncateType
}
export type InferenceCohereTaskType = 'completion' | 'rerank' | 'text_embedding'
export type InferenceCohereTruncateType = 'END' | 'NONE' | 'START'
export interface InferenceCompletionInferenceResult {
completion: InferenceCompletionResult[]
}
@ -12848,6 +12978,34 @@ export interface InferenceCompletionResult {
result: string
}
export interface InferenceCompletionTool {
type: string
function: InferenceCompletionToolFunction
}
export interface InferenceCompletionToolChoice {
type: string
function: InferenceCompletionToolChoiceFunction
}
export interface InferenceCompletionToolChoiceFunction {
name: string
}
export interface InferenceCompletionToolFunction {
description?: string
name: string
parameters?: any
strict?: boolean
}
export type InferenceCompletionToolType = string | InferenceCompletionToolChoice
export interface InferenceContentObject {
text: string
type: string
}
export interface InferenceDeleteInferenceEndpointResult extends AcknowledgedResponseBase {
pipelines: string[]
}
@ -12856,7 +13014,79 @@ export type InferenceDenseByteVector = byte[]
export type InferenceDenseVector = float[]
export interface InferenceInferenceChunkingSettings extends InferenceInferenceEndpoint {
export interface InferenceEisServiceSettings {
model_id: string
rate_limit?: InferenceRateLimitSetting
}
export type InferenceEisServiceType = 'elastic'
export type InferenceEisTaskType = 'chat_completion'
export interface InferenceElasticsearchServiceSettings {
adaptive_allocations?: InferenceAdaptiveAllocations
deployment_id?: string
model_id: string
num_allocations?: integer
num_threads: integer
}
export type InferenceElasticsearchServiceType = 'elasticsearch'
export interface InferenceElasticsearchTaskSettings {
return_documents?: boolean
}
export type InferenceElasticsearchTaskType = 'rerank' | 'sparse_embedding' | 'text_embedding'
export interface InferenceElserServiceSettings {
adaptive_allocations?: InferenceAdaptiveAllocations
num_allocations: integer
num_threads: integer
}
export type InferenceElserServiceType = 'elser'
export type InferenceElserTaskType = 'sparse_embedding'
export type InferenceGoogleAiServiceType = 'googleaistudio'
export interface InferenceGoogleAiStudioServiceSettings {
api_key: string
model_id: string
rate_limit?: InferenceRateLimitSetting
}
export type InferenceGoogleAiStudioTaskType = 'completion' | 'text_embedding'
export interface InferenceGoogleVertexAIServiceSettings {
location: string
model_id: string
project_id: string
rate_limit?: InferenceRateLimitSetting
service_account_json: string
}
export type InferenceGoogleVertexAIServiceType = 'googlevertexai'
export interface InferenceGoogleVertexAITaskSettings {
auto_truncate?: boolean
top_n?: integer
}
export type InferenceGoogleVertexAITaskType = 'rerank' | 'text_embedding'
export interface InferenceHuggingFaceServiceSettings {
api_key: string
rate_limit?: InferenceRateLimitSetting
url: string
}
export type InferenceHuggingFaceServiceType = 'hugging_face'
export type InferenceHuggingFaceTaskType = 'text_embedding'
export interface InferenceInferenceChunkingSettings {
max_chunk_size?: integer
overlap?: integer
sentence_overlap?: integer
@ -12875,6 +13105,64 @@ export interface InferenceInferenceEndpointInfo extends InferenceInferenceEndpoi
task_type: InferenceTaskType
}
export interface InferenceJinaAIServiceSettings {
api_key: string
model_id?: string
rate_limit?: InferenceRateLimitSetting
similarity?: InferenceJinaAISimilarityType
}
export type InferenceJinaAIServiceType = 'jinaai'
export type InferenceJinaAISimilarityType = 'cosine' | 'dot_product' | 'l2_norm'
export interface InferenceJinaAITaskSettings {
return_documents?: boolean
task?: InferenceJinaAITextEmbeddingTask
top_n?: integer
}
export type InferenceJinaAITaskType = 'rerank' | 'text_embedding'
export type InferenceJinaAITextEmbeddingTask = 'classification' | 'clustering' | 'ingest' | 'search'
export interface InferenceMessage {
content?: InferenceMessageContent
role: string
tool_call_id?: Id
tool_calls?: InferenceToolCall[]
}
export type InferenceMessageContent = string | InferenceContentObject[]
export interface InferenceMistralServiceSettings {
api_key: string
max_input_tokens?: integer
model: string
rate_limit?: InferenceRateLimitSetting
}
export type InferenceMistralServiceType = 'mistral'
export type InferenceMistralTaskType = 'text_embedding'
export interface InferenceOpenAIServiceSettings {
api_key: string
dimensions?: integer
model_id: string
organization_id?: string
rate_limit?: InferenceRateLimitSetting
url?: string
}
export type InferenceOpenAIServiceType = 'openai'
export interface InferenceOpenAITaskSettings {
user?: string
}
export type InferenceOpenAITaskType = 'chat_completion' | 'completion' | 'text_embedding'
export interface InferenceRankedDocument {
index: integer
relevance_score: float
@ -12885,6 +13173,17 @@ export interface InferenceRateLimitSetting {
requests_per_minute?: integer
}
export interface InferenceRequestChatCompletion {
messages: InferenceMessage[]
model?: string
max_completion_tokens?: long
stop?: string[]
temperature?: float
tool_choice?: InferenceCompletionToolType
tools?: InferenceCompletionTool[]
top_p?: float
}
export interface InferenceRerankedInferenceResult {
rerank: InferenceRankedDocument[]
}
@ -12918,69 +13217,56 @@ export interface InferenceTextEmbeddingResult {
embedding: InferenceDenseVector
}
export interface InferenceChatCompletionUnifiedCompletionTool {
export interface InferenceToolCall {
id: Id
function: InferenceToolCallFunction
type: string
function: InferenceChatCompletionUnifiedCompletionToolFunction
}
export interface InferenceChatCompletionUnifiedCompletionToolChoice {
type: string
function: InferenceChatCompletionUnifiedCompletionToolChoiceFunction
}
export interface InferenceChatCompletionUnifiedCompletionToolChoiceFunction {
export interface InferenceToolCallFunction {
arguments: string
name: string
}
export interface InferenceChatCompletionUnifiedCompletionToolFunction {
description?: string
name: string
parameters?: any
strict?: boolean
export interface InferenceVoyageAIServiceSettings {
dimensions?: integer
model_id: string
rate_limit?: InferenceRateLimitSetting
embedding_type?: float
}
export type InferenceChatCompletionUnifiedCompletionToolType = string | InferenceChatCompletionUnifiedCompletionToolChoice
export type InferenceVoyageAIServiceType = 'voyageai'
export interface InferenceChatCompletionUnifiedContentObject {
text: string
type: string
export interface InferenceVoyageAITaskSettings {
input_type?: string
return_documents?: boolean
top_k?: integer
truncation?: boolean
}
export interface InferenceChatCompletionUnifiedMessage {
content?: InferenceChatCompletionUnifiedMessageContent
role: string
tool_call_id?: Id
tool_calls?: InferenceChatCompletionUnifiedToolCall[]
export type InferenceVoyageAITaskType = 'text_embedding' | 'rerank'
export interface InferenceWatsonxServiceSettings {
api_key: string
api_version: string
model_id: string
project_id: string
rate_limit?: InferenceRateLimitSetting
url: string
}
export type InferenceChatCompletionUnifiedMessageContent = string | InferenceChatCompletionUnifiedContentObject[]
export type InferenceWatsonxServiceType = 'watsonxai'
export type InferenceWatsonxTaskType = 'text_embedding'
export interface InferenceChatCompletionUnifiedRequest extends RequestBase {
inference_id: Id
timeout?: Duration
messages: InferenceChatCompletionUnifiedMessage[]
model?: string
max_completion_tokens?: long
stop?: string[]
temperature?: float
tool_choice?: InferenceChatCompletionUnifiedCompletionToolType
tools?: InferenceChatCompletionUnifiedCompletionTool[]
top_p?: float
chat_completion_request?: InferenceRequestChatCompletion
}
export type InferenceChatCompletionUnifiedResponse = StreamResult
export interface InferenceChatCompletionUnifiedToolCall {
id: Id
function: InferenceChatCompletionUnifiedToolCallFunction
type: string
}
export interface InferenceChatCompletionUnifiedToolCallFunction {
arguments: string
name: string
}
export interface InferenceCompletionRequest extends RequestBase {
inference_id: Id
timeout?: Duration
@ -13008,6 +13294,13 @@ export interface InferenceGetResponse {
endpoints: InferenceInferenceEndpointInfo[]
}
export interface InferencePostEisChatCompletionRequest extends RequestBase {
eis_inference_id: Id
chat_completion_request?: InferenceRequestChatCompletion
}
export type InferencePostEisChatCompletionResponse = StreamResult
export interface InferencePutRequest extends RequestBase {
task_type?: InferenceTaskType
inference_id: Id
@ -13016,103 +13309,185 @@ export interface InferencePutRequest extends RequestBase {
export type InferencePutResponse = InferenceInferenceEndpointInfo
export interface InferencePutEisEisServiceSettings {
model_id: string
rate_limit?: InferenceRateLimitSetting
export interface InferencePutAlibabacloudRequest extends RequestBase {
task_type: InferenceAlibabaCloudTaskType
alibabacloud_inference_id: Id
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceAlibabaCloudServiceType
service_settings: InferenceAlibabaCloudServiceSettings
task_settings?: InferenceAlibabaCloudTaskSettings
}
export type InferencePutEisEisTaskType = 'chat_completion'
export type InferencePutAlibabacloudResponse = InferenceInferenceEndpointInfo
export interface InferencePutAmazonbedrockRequest extends RequestBase {
task_type: InferenceAmazonBedrockTaskType
amazonbedrock_inference_id: Id
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceAmazonBedrockServiceType
service_settings: InferenceAmazonBedrockServiceSettings
task_settings?: InferenceAmazonBedrockTaskSettings
}
export type InferencePutAmazonbedrockResponse = InferenceInferenceEndpointInfo
export interface InferencePutAnthropicRequest extends RequestBase {
task_type: InferenceAnthropicTaskType
anthropic_inference_id: Id
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceAnthropicServiceType
service_settings: InferenceAnthropicServiceSettings
task_settings?: InferenceAnthropicTaskSettings
}
export type InferencePutAnthropicResponse = InferenceInferenceEndpointInfo
export interface InferencePutAzureaistudioRequest extends RequestBase {
task_type: InferenceAzureAiStudioTaskType
azureaistudio_inference_id: Id
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceAzureAiStudioServiceType
service_settings: InferenceAzureAiStudioServiceSettings
task_settings?: InferenceAzureAiStudioTaskSettings
}
export type InferencePutAzureaistudioResponse = InferenceInferenceEndpointInfo
export interface InferencePutAzureopenaiRequest extends RequestBase {
task_type: InferenceAzureOpenAITaskType
azureopenai_inference_id: Id
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceAzureOpenAIServiceType
service_settings: InferenceAzureOpenAIServiceSettings
task_settings?: InferenceAzureOpenAITaskSettings
}
export type InferencePutAzureopenaiResponse = InferenceInferenceEndpointInfo
export interface InferencePutCohereRequest extends RequestBase {
task_type: InferenceCohereTaskType
cohere_inference_id: Id
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceCohereServiceType
service_settings: InferenceCohereServiceSettings
task_settings?: InferenceCohereTaskSettings
}
export type InferencePutCohereResponse = InferenceInferenceEndpointInfo
export interface InferencePutEisRequest extends RequestBase {
task_type: InferencePutEisEisTaskType
task_type: InferenceEisTaskType
eis_inference_id: Id
service: InferencePutEisServiceType
service_settings: InferencePutEisEisServiceSettings
service: InferenceEisServiceType
service_settings: InferenceEisServiceSettings
}
export type InferencePutEisResponse = InferenceInferenceEndpointInfo
export type InferencePutEisServiceType = 'elastic'
export interface InferencePutOpenaiOpenAIServiceSettings {
api_key: string
dimensions?: integer
model_id: string
organization_id?: string
rate_limit?: InferenceRateLimitSetting
url?: string
export interface InferencePutElasticsearchRequest extends RequestBase {
task_type: InferenceElasticsearchTaskType
elasticsearch_inference_id: Id
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceElasticsearchServiceType
service_settings: InferenceElasticsearchServiceSettings
task_settings?: InferenceElasticsearchTaskSettings
}
export interface InferencePutOpenaiOpenAITaskSettings {
user?: string
export type InferencePutElasticsearchResponse = InferenceInferenceEndpointInfo
export interface InferencePutElserRequest extends RequestBase {
task_type: InferenceElserTaskType
elser_inference_id: Id
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceElserServiceType
service_settings: InferenceElserServiceSettings
}
export type InferencePutOpenaiOpenAITaskType = 'chat_completion' | 'completion' | 'text_embedding'
export type InferencePutElserResponse = InferenceInferenceEndpointInfo
export interface InferencePutGoogleaistudioRequest extends RequestBase {
task_type: InferenceGoogleAiStudioTaskType
googleaistudio_inference_id: Id
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceGoogleAiServiceType
service_settings: InferenceGoogleAiStudioServiceSettings
}
export type InferencePutGoogleaistudioResponse = InferenceInferenceEndpointInfo
export interface InferencePutGooglevertexaiRequest extends RequestBase {
task_type: InferenceGoogleVertexAITaskType
googlevertexai_inference_id: Id
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceGoogleVertexAIServiceType
service_settings: InferenceGoogleVertexAIServiceSettings
task_settings?: InferenceGoogleVertexAITaskSettings
}
export type InferencePutGooglevertexaiResponse = InferenceInferenceEndpointInfo
export interface InferencePutHuggingFaceRequest extends RequestBase {
task_type: InferenceHuggingFaceTaskType
huggingface_inference_id: Id
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceHuggingFaceServiceType
service_settings: InferenceHuggingFaceServiceSettings
}
export type InferencePutHuggingFaceResponse = InferenceInferenceEndpointInfo
export interface InferencePutJinaaiRequest extends RequestBase {
task_type: InferenceJinaAITaskType
jinaai_inference_id: Id
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceJinaAIServiceType
service_settings: InferenceJinaAIServiceSettings
task_settings?: InferenceJinaAITaskSettings
}
export type InferencePutJinaaiResponse = InferenceInferenceEndpointInfo
export interface InferencePutMistralRequest extends RequestBase {
task_type: InferenceMistralTaskType
mistral_inference_id: Id
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceMistralServiceType
service_settings: InferenceMistralServiceSettings
}
export type InferencePutMistralResponse = InferenceInferenceEndpointInfo
export interface InferencePutOpenaiRequest extends RequestBase {
task_type: InferencePutOpenaiOpenAITaskType
task_type: InferenceOpenAITaskType
openai_inference_id: Id
chunking_settings?: InferenceInferenceChunkingSettings
service: InferencePutOpenaiServiceType
service_settings: InferencePutOpenaiOpenAIServiceSettings
task_settings?: InferencePutOpenaiOpenAITaskSettings
service: InferenceOpenAIServiceType
service_settings: InferenceOpenAIServiceSettings
task_settings?: InferenceOpenAITaskSettings
}
export type InferencePutOpenaiResponse = InferenceInferenceEndpointInfo
export type InferencePutOpenaiServiceType = 'openai'
export interface InferencePutVoyageaiRequest extends RequestBase {
task_type: InferencePutVoyageaiVoyageAITaskType
task_type: InferenceVoyageAITaskType
voyageai_inference_id: Id
chunking_settings?: InferenceInferenceChunkingSettings
service: InferencePutVoyageaiServiceType
service_settings: InferencePutVoyageaiVoyageAIServiceSettings
task_settings?: InferencePutVoyageaiVoyageAITaskSettings
service: InferenceVoyageAIServiceType
service_settings: InferenceVoyageAIServiceSettings
task_settings?: InferenceVoyageAITaskSettings
}
export type InferencePutVoyageaiResponse = InferenceInferenceEndpointInfo
export type InferencePutVoyageaiServiceType = 'voyageai'
export interface InferencePutVoyageaiVoyageAIServiceSettings {
dimensions?: integer
model_id: string
rate_limit?: InferenceRateLimitSetting
embedding_type?: float
}
export interface InferencePutVoyageaiVoyageAITaskSettings {
input_type?: string
return_documents?: boolean
top_k?: integer
truncation?: boolean
}
export type InferencePutVoyageaiVoyageAITaskType = 'text_embedding' | 'rerank'
export interface InferencePutWatsonxRequest extends RequestBase {
task_type: InferencePutWatsonxWatsonxTaskType
task_type: InferenceWatsonxTaskType
watsonx_inference_id: Id
service: InferencePutWatsonxServiceType
service_settings: InferencePutWatsonxWatsonxServiceSettings
service: InferenceWatsonxServiceType
service_settings: InferenceWatsonxServiceSettings
}
export type InferencePutWatsonxResponse = InferenceInferenceEndpointInfo
export type InferencePutWatsonxServiceType = 'watsonxai'
export interface InferencePutWatsonxWatsonxServiceSettings {
api_key: string
api_version: string
model_id: string
project_id: string
rate_limit?: InferenceRateLimitSetting
url: string
}
export type InferencePutWatsonxWatsonxTaskType = 'text_embedding'
export interface InferenceRerankRequest extends RequestBase {
inference_id: Id
timeout?: Duration
@ -14654,6 +15029,8 @@ export interface MlExponentialAverageCalculationContext {
previous_exponential_average_ms?: DurationValue<UnitFloatMillis>
}
export type MlFeatureExtractor = MlQueryFeatureExtractor
export interface MlFillMaskInferenceOptions {
mask_token?: string
num_top_classes?: integer
@ -14719,6 +15096,7 @@ export interface MlInferenceConfigCreateContainer {
text_classification?: MlTextClassificationInferenceOptions
zero_shot_classification?: MlZeroShotClassificationInferenceOptions
fill_mask?: MlFillMaskInferenceOptions
learning_to_rank?: MlLearningToRankConfig
ner?: MlNerInferenceOptions
pass_through?: MlPassThroughInferenceOptions
text_embedding?: MlTextEmbeddingInferenceOptions
@ -14865,6 +15243,12 @@ export interface MlJobTimingStats {
minimum_bucket_processing_time_ms?: DurationValue<UnitFloatMillis>
}
export interface MlLearningToRankConfig {
default_params?: Record<string, any>
feature_extractors?: Record<string, MlFeatureExtractor>[]
num_top_feature_importance_values: integer
}
export type MlMemoryStatus = 'ok' | 'soft_limit' | 'hard_limit'
export interface MlModelPackageConfig {
@ -15007,6 +15391,12 @@ export interface MlPerPartitionCategorization {
export type MlPredictedValue = ScalarValue | ScalarValue[]
export interface MlQueryFeatureExtractor {
default_score?: float
feature_name: string
query: QueryDslQueryContainer
}
export interface MlQuestionAnsweringInferenceOptions {
num_top_classes?: integer
tokenization?: MlTokenizationConfigContainer
@ -15051,6 +15441,7 @@ export interface MlTextClassificationInferenceOptions {
tokenization?: MlTokenizationConfigContainer
results_field?: string
classification_labels?: string[]
vocabulary?: MlVocabulary
}
export interface MlTextClassificationInferenceUpdateOptions {
@ -15093,6 +15484,7 @@ export interface MlTokenizationConfigContainer {
bert_ja?: MlNlpBertTokenizationConfig
mpnet?: MlNlpBertTokenizationConfig
roberta?: MlNlpRobertaTokenizationConfig
xlm_roberta?: MlXlmRobertaTokenizationConfig
}
export type MlTokenizationTruncate = 'first' | 'second' | 'none'
@ -15130,6 +15522,11 @@ export interface MlTrainedModelAssignment {
task_parameters: MlTrainedModelAssignmentTaskParameters
}
export interface MlTrainedModelAssignmentRoutingStateAndReason {
reason?: string
routing_state: MlRoutingState
}
export interface MlTrainedModelAssignmentRoutingTable {
reason?: string
routing_state: MlRoutingState
@ -15170,6 +15567,7 @@ export interface MlTrainedModelConfig {
model_size_bytes?: ByteSize
model_package?: MlModelPackageConfig
location?: MlTrainedModelLocation
platform_architecture?: string
prefix_strings?: MlTrainedModelPrefixStrings
}
@ -15203,8 +15601,8 @@ export interface MlTrainedModelDeploymentNodesStats {
number_of_allocations?: integer
number_of_pending_requests?: integer
peak_throughput_per_minute: long
rejection_execution_count?: integer
routing_state: MlTrainedModelAssignmentRoutingTable
rejected_execution_count?: integer
routing_state: MlTrainedModelAssignmentRoutingStateAndReason
start_time?: EpochTime<UnitMillis>
threads_per_allocation?: integer
throughput_last_minute: integer
@ -15305,6 +15703,9 @@ export interface MlVocabulary {
index: IndexName
}
export interface MlXlmRobertaTokenizationConfig extends MlCommonTokenizationConfig {
}
export interface MlZeroShotClassificationInferenceOptions {
tokenization?: MlTokenizationConfigContainer
hypothesis_template?: string
@ -20426,6 +20827,7 @@ export interface TransformGetTransformResponse {
export interface TransformGetTransformTransformSummary {
authorization?: MlTransformAuthorization
create_time?: EpochTime<UnitMillis>
create_time_string?: DateTime
description?: string
dest: ReindexDestination
frequency?: Duration
@ -20451,11 +20853,12 @@ export interface TransformGetTransformStatsCheckpointStats {
export interface TransformGetTransformStatsCheckpointing {
changes_last_detected_at?: long
changes_last_detected_at_date_time?: DateTime
changes_last_detected_at_string?: DateTime
last: TransformGetTransformStatsCheckpointStats
next?: TransformGetTransformStatsCheckpointStats
operations_behind?: long
last_search_time?: long
last_search_time_string?: DateTime
}
export interface TransformGetTransformStatsRequest extends RequestBase {
@ -20471,6 +20874,15 @@ export interface TransformGetTransformStatsResponse {
transforms: TransformGetTransformStatsTransformStats[]
}
export interface TransformGetTransformStatsTransformHealthIssue {
type: string
issue: string
details?: string
count: integer
first_occurrence?: EpochTime<UnitMillis>
first_occurence_string?: DateTime
}
export interface TransformGetTransformStatsTransformIndexerStats {
delete_time_in_ms?: EpochTime<UnitMillis>
documents_indexed: long
@ -20511,6 +20923,7 @@ export interface TransformGetTransformStatsTransformStats {
export interface TransformGetTransformStatsTransformStatsHealth {
status: HealthStatus
issues?: TransformGetTransformStatsTransformHealthIssue[]
}
export interface TransformPreviewTransformRequest extends RequestBase {

View File

@ -788,7 +788,7 @@ export interface MsearchMultisearchBody {
knn?: KnnSearch | KnnSearch[]
from?: integer
highlight?: SearchHighlight
indices_boost?: Record<IndexName, double>[]
indices_boost?: Partial<Record<IndexName, double>>[]
min_score?: double
post_filter?: QueryDslQueryContainer
profile?: boolean
@ -931,6 +931,7 @@ export interface OpenPointInTimeRequest extends RequestBase {
routing?: Routing
expand_wildcards?: ExpandWildcards
allow_partial_search_results?: boolean
max_concurrent_shard_requests?: integer
/** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */
body?: {
index_filter?: QueryDslQueryContainer
@ -1256,7 +1257,7 @@ export interface SearchRequest extends RequestBase {
from?: integer
highlight?: SearchHighlight
track_total_hits?: SearchTrackHits
indices_boost?: Record<IndexName, double>[]
indices_boost?: Partial<Record<IndexName, double>>[]
docvalue_fields?: (QueryDslFieldAndFormat | Field)[]
knn?: KnnSearch | KnnSearch[]
rank?: RankContainer
@ -5382,6 +5383,10 @@ export interface MappingBooleanProperty extends MappingDocValuesPropertyBase {
fielddata?: IndicesNumericFielddata
index?: boolean
null_value?: boolean
ignore_malformed?: boolean
script?: Script | string
on_script_error?: MappingOnScriptError
time_series_dimension?: boolean
type: 'boolean'
}
@ -5463,7 +5468,7 @@ export interface MappingDenseVectorIndexOptions {
type: MappingDenseVectorIndexOptionsType
}
export type MappingDenseVectorIndexOptionsType = 'flat' | 'hnsw' | 'int4_flat' | 'int4_hnsw' | 'int8_flat' | 'int8_hnsw'
export type MappingDenseVectorIndexOptionsType = 'bbq_flat' | 'bbq_hnsw' | 'flat' | 'hnsw' | 'int4_flat' | 'int4_hnsw' | 'int8_flat' | 'int8_hnsw'
export interface MappingDenseVectorProperty extends MappingPropertyBase {
type: 'dense_vector'
@ -6840,7 +6845,7 @@ export interface AsyncSearchSubmitRequest extends RequestBase {
from?: integer
highlight?: SearchHighlight
track_total_hits?: SearchTrackHits
indices_boost?: Record<IndexName, double>[]
indices_boost?: Partial<Record<IndexName, double>>[]
docvalue_fields?: (QueryDslFieldAndFormat | Field)[]
knn?: KnnSearch | KnnSearch[]
min_score?: double
@ -10417,10 +10422,11 @@ export interface EnrichDeletePolicyRequest extends RequestBase {
export type EnrichDeletePolicyResponse = AcknowledgedResponseBase
export type EnrichExecutePolicyEnrichPolicyPhase = 'SCHEDULED' | 'RUNNING' | 'COMPLETE' | 'FAILED'
export type EnrichExecutePolicyEnrichPolicyPhase = 'SCHEDULED' | 'RUNNING' | 'COMPLETE' | 'FAILED' | 'CANCELLED'
export interface EnrichExecutePolicyExecuteEnrichPolicyStatus {
phase: EnrichExecutePolicyEnrichPolicyPhase
step?: string
}
export interface EnrichExecutePolicyRequest extends RequestBase {
@ -10431,7 +10437,7 @@ export interface EnrichExecutePolicyRequest extends RequestBase {
export interface EnrichExecutePolicyResponse {
status?: EnrichExecutePolicyExecuteEnrichPolicyStatus
task_id?: TaskId
task?: TaskId
}
export interface EnrichGetPolicyRequest extends RequestBase {
@ -10759,7 +10765,7 @@ export interface FleetSearchRequest extends RequestBase {
from?: integer
highlight?: SearchHighlight
track_total_hits?: SearchTrackHits
indices_boost?: Record<IndexName, double>[]
indices_boost?: Partial<Record<IndexName, double>>[]
docvalue_fields?: (QueryDslFieldAndFormat | Field)[]
min_score?: double
post_filter?: QueryDslQueryContainer
@ -13082,6 +13088,130 @@ export interface IndicesValidateQueryResponse {
error?: string
}
export interface InferenceAdaptiveAllocations {
enabled?: boolean
max_number_of_allocations?: integer
min_number_of_allocations?: integer
}
export interface InferenceAlibabaCloudServiceSettings {
api_key: string
host: string
rate_limit?: InferenceRateLimitSetting
service_id: string
workspace: string
}
export type InferenceAlibabaCloudServiceType = 'alibabacloud-ai-search'
export interface InferenceAlibabaCloudTaskSettings {
input_type?: string
return_token?: boolean
}
export type InferenceAlibabaCloudTaskType = 'completion' | 'rerank' | 'space_embedding' | 'text_embedding'
export interface InferenceAmazonBedrockServiceSettings {
access_key: string
model: string
provider?: string
region: string
rate_limit?: InferenceRateLimitSetting
secret_key: string
}
export type InferenceAmazonBedrockServiceType = 'amazonbedrock'
export interface InferenceAmazonBedrockTaskSettings {
max_new_tokens?: integer
temperature?: float
top_k?: float
top_p?: float
}
export type InferenceAmazonBedrockTaskType = 'completion' | 'text_embedding'
export interface InferenceAnthropicServiceSettings {
api_key: string
model_id: string
rate_limit?: InferenceRateLimitSetting
}
export type InferenceAnthropicServiceType = 'anthropic'
export interface InferenceAnthropicTaskSettings {
max_tokens: integer
temperature?: float
top_k?: integer
top_p?: float
}
export type InferenceAnthropicTaskType = 'completion'
export interface InferenceAzureAiStudioServiceSettings {
api_key: string
endpoint_type: string
target: string
provider: string
rate_limit?: InferenceRateLimitSetting
}
export type InferenceAzureAiStudioServiceType = 'azureaistudio'
export interface InferenceAzureAiStudioTaskSettings {
do_sample?: float
max_new_tokens?: integer
temperature?: float
top_p?: float
user?: string
}
export type InferenceAzureAiStudioTaskType = 'completion' | 'text_embedding'
export interface InferenceAzureOpenAIServiceSettings {
api_key?: string
api_version: string
deployment_id: string
entra_id?: string
rate_limit?: InferenceRateLimitSetting
resource_name: string
}
export type InferenceAzureOpenAIServiceType = 'azureopenai'
export interface InferenceAzureOpenAITaskSettings {
user?: string
}
export type InferenceAzureOpenAITaskType = 'completion' | 'text_embedding'
export type InferenceCohereEmbeddingType = 'byte' | 'float' | 'int8'
export type InferenceCohereInputType = 'classification' | 'clustering' | 'ingest' | 'search'
export interface InferenceCohereServiceSettings {
api_key: string
embedding_type?: InferenceCohereEmbeddingType
model_id?: string
rate_limit?: InferenceRateLimitSetting
similarity?: InferenceCohereSimilarityType
}
export type InferenceCohereServiceType = 'cohere'
export type InferenceCohereSimilarityType = 'cosine' | 'dot_product' | 'l2_norm'
export interface InferenceCohereTaskSettings {
input_type?: InferenceCohereInputType
return_documents?: boolean
top_n?: integer
truncate?: InferenceCohereTruncateType
}
export type InferenceCohereTaskType = 'completion' | 'rerank' | 'text_embedding'
export type InferenceCohereTruncateType = 'END' | 'NONE' | 'START'
export interface InferenceCompletionInferenceResult {
completion: InferenceCompletionResult[]
}
@ -13090,6 +13220,34 @@ export interface InferenceCompletionResult {
result: string
}
export interface InferenceCompletionTool {
type: string
function: InferenceCompletionToolFunction
}
export interface InferenceCompletionToolChoice {
type: string
function: InferenceCompletionToolChoiceFunction
}
export interface InferenceCompletionToolChoiceFunction {
name: string
}
export interface InferenceCompletionToolFunction {
description?: string
name: string
parameters?: any
strict?: boolean
}
export type InferenceCompletionToolType = string | InferenceCompletionToolChoice
export interface InferenceContentObject {
text: string
type: string
}
export interface InferenceDeleteInferenceEndpointResult extends AcknowledgedResponseBase {
pipelines: string[]
}
@ -13098,7 +13256,79 @@ export type InferenceDenseByteVector = byte[]
export type InferenceDenseVector = float[]
export interface InferenceInferenceChunkingSettings extends InferenceInferenceEndpoint {
export interface InferenceEisServiceSettings {
model_id: string
rate_limit?: InferenceRateLimitSetting
}
export type InferenceEisServiceType = 'elastic'
export type InferenceEisTaskType = 'chat_completion'
export interface InferenceElasticsearchServiceSettings {
adaptive_allocations?: InferenceAdaptiveAllocations
deployment_id?: string
model_id: string
num_allocations?: integer
num_threads: integer
}
export type InferenceElasticsearchServiceType = 'elasticsearch'
export interface InferenceElasticsearchTaskSettings {
return_documents?: boolean
}
export type InferenceElasticsearchTaskType = 'rerank' | 'sparse_embedding' | 'text_embedding'
export interface InferenceElserServiceSettings {
adaptive_allocations?: InferenceAdaptiveAllocations
num_allocations: integer
num_threads: integer
}
export type InferenceElserServiceType = 'elser'
export type InferenceElserTaskType = 'sparse_embedding'
export type InferenceGoogleAiServiceType = 'googleaistudio'
export interface InferenceGoogleAiStudioServiceSettings {
api_key: string
model_id: string
rate_limit?: InferenceRateLimitSetting
}
export type InferenceGoogleAiStudioTaskType = 'completion' | 'text_embedding'
export interface InferenceGoogleVertexAIServiceSettings {
location: string
model_id: string
project_id: string
rate_limit?: InferenceRateLimitSetting
service_account_json: string
}
export type InferenceGoogleVertexAIServiceType = 'googlevertexai'
export interface InferenceGoogleVertexAITaskSettings {
auto_truncate?: boolean
top_n?: integer
}
export type InferenceGoogleVertexAITaskType = 'rerank' | 'text_embedding'
export interface InferenceHuggingFaceServiceSettings {
api_key: string
rate_limit?: InferenceRateLimitSetting
url: string
}
export type InferenceHuggingFaceServiceType = 'hugging_face'
export type InferenceHuggingFaceTaskType = 'text_embedding'
export interface InferenceInferenceChunkingSettings {
max_chunk_size?: integer
overlap?: integer
sentence_overlap?: integer
@ -13117,6 +13347,64 @@ export interface InferenceInferenceEndpointInfo extends InferenceInferenceEndpoi
task_type: InferenceTaskType
}
export interface InferenceJinaAIServiceSettings {
api_key: string
model_id?: string
rate_limit?: InferenceRateLimitSetting
similarity?: InferenceJinaAISimilarityType
}
export type InferenceJinaAIServiceType = 'jinaai'
export type InferenceJinaAISimilarityType = 'cosine' | 'dot_product' | 'l2_norm'
export interface InferenceJinaAITaskSettings {
return_documents?: boolean
task?: InferenceJinaAITextEmbeddingTask
top_n?: integer
}
export type InferenceJinaAITaskType = 'rerank' | 'text_embedding'
export type InferenceJinaAITextEmbeddingTask = 'classification' | 'clustering' | 'ingest' | 'search'
export interface InferenceMessage {
content?: InferenceMessageContent
role: string
tool_call_id?: Id
tool_calls?: InferenceToolCall[]
}
export type InferenceMessageContent = string | InferenceContentObject[]
export interface InferenceMistralServiceSettings {
api_key: string
max_input_tokens?: integer
model: string
rate_limit?: InferenceRateLimitSetting
}
export type InferenceMistralServiceType = 'mistral'
export type InferenceMistralTaskType = 'text_embedding'
export interface InferenceOpenAIServiceSettings {
api_key: string
dimensions?: integer
model_id: string
organization_id?: string
rate_limit?: InferenceRateLimitSetting
url?: string
}
export type InferenceOpenAIServiceType = 'openai'
export interface InferenceOpenAITaskSettings {
user?: string
}
export type InferenceOpenAITaskType = 'chat_completion' | 'completion' | 'text_embedding'
export interface InferenceRankedDocument {
index: integer
relevance_score: float
@ -13127,6 +13415,17 @@ export interface InferenceRateLimitSetting {
requests_per_minute?: integer
}
export interface InferenceRequestChatCompletion {
messages: InferenceMessage[]
model?: string
max_completion_tokens?: long
stop?: string[]
temperature?: float
tool_choice?: InferenceCompletionToolType
tools?: InferenceCompletionTool[]
top_p?: float
}
export interface InferenceRerankedInferenceResult {
rerank: InferenceRankedDocument[]
}
@ -13160,72 +13459,57 @@ export interface InferenceTextEmbeddingResult {
embedding: InferenceDenseVector
}
export interface InferenceChatCompletionUnifiedCompletionTool {
export interface InferenceToolCall {
id: Id
function: InferenceToolCallFunction
type: string
function: InferenceChatCompletionUnifiedCompletionToolFunction
}
export interface InferenceChatCompletionUnifiedCompletionToolChoice {
type: string
function: InferenceChatCompletionUnifiedCompletionToolChoiceFunction
}
export interface InferenceChatCompletionUnifiedCompletionToolChoiceFunction {
export interface InferenceToolCallFunction {
arguments: string
name: string
}
export interface InferenceChatCompletionUnifiedCompletionToolFunction {
description?: string
name: string
parameters?: any
strict?: boolean
export interface InferenceVoyageAIServiceSettings {
dimensions?: integer
model_id: string
rate_limit?: InferenceRateLimitSetting
embedding_type?: float
}
export type InferenceChatCompletionUnifiedCompletionToolType = string | InferenceChatCompletionUnifiedCompletionToolChoice
export type InferenceVoyageAIServiceType = 'voyageai'
export interface InferenceChatCompletionUnifiedContentObject {
text: string
type: string
export interface InferenceVoyageAITaskSettings {
input_type?: string
return_documents?: boolean
top_k?: integer
truncation?: boolean
}
export interface InferenceChatCompletionUnifiedMessage {
content?: InferenceChatCompletionUnifiedMessageContent
role: string
tool_call_id?: Id
tool_calls?: InferenceChatCompletionUnifiedToolCall[]
export type InferenceVoyageAITaskType = 'text_embedding' | 'rerank'
export interface InferenceWatsonxServiceSettings {
api_key: string
api_version: string
model_id: string
project_id: string
rate_limit?: InferenceRateLimitSetting
url: string
}
export type InferenceChatCompletionUnifiedMessageContent = string | InferenceChatCompletionUnifiedContentObject[]
export type InferenceWatsonxServiceType = 'watsonxai'
export type InferenceWatsonxTaskType = 'text_embedding'
export interface InferenceChatCompletionUnifiedRequest extends RequestBase {
inference_id: Id
timeout?: Duration
/** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */
body?: {
messages: InferenceChatCompletionUnifiedMessage[]
model?: string
max_completion_tokens?: long
stop?: string[]
temperature?: float
tool_choice?: InferenceChatCompletionUnifiedCompletionToolType
tools?: InferenceChatCompletionUnifiedCompletionTool[]
top_p?: float
}
/** @deprecated The use of the 'body' key has been deprecated, use 'chat_completion_request' instead. */
body?: InferenceRequestChatCompletion
}
export type InferenceChatCompletionUnifiedResponse = StreamResult
export interface InferenceChatCompletionUnifiedToolCall {
id: Id
function: InferenceChatCompletionUnifiedToolCallFunction
type: string
}
export interface InferenceChatCompletionUnifiedToolCallFunction {
arguments: string
name: string
}
export interface InferenceCompletionRequest extends RequestBase {
inference_id: Id
timeout?: Duration
@ -13256,6 +13540,14 @@ export interface InferenceGetResponse {
endpoints: InferenceInferenceEndpointInfo[]
}
export interface InferencePostEisChatCompletionRequest extends RequestBase {
eis_inference_id: Id
/** @deprecated The use of the 'body' key has been deprecated, use 'chat_completion_request' instead. */
body?: InferenceRequestChatCompletion
}
export type InferencePostEisChatCompletionResponse = StreamResult
export interface InferencePutRequest extends RequestBase {
task_type?: InferenceTaskType
inference_id: Id
@ -13265,115 +13557,236 @@ export interface InferencePutRequest extends RequestBase {
export type InferencePutResponse = InferenceInferenceEndpointInfo
export interface InferencePutEisEisServiceSettings {
model_id: string
rate_limit?: InferenceRateLimitSetting
export interface InferencePutAlibabacloudRequest extends RequestBase {
task_type: InferenceAlibabaCloudTaskType
alibabacloud_inference_id: Id
/** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */
body?: {
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceAlibabaCloudServiceType
service_settings: InferenceAlibabaCloudServiceSettings
task_settings?: InferenceAlibabaCloudTaskSettings
}
}
export type InferencePutEisEisTaskType = 'chat_completion'
export type InferencePutAlibabacloudResponse = InferenceInferenceEndpointInfo
export interface InferencePutAmazonbedrockRequest extends RequestBase {
task_type: InferenceAmazonBedrockTaskType
amazonbedrock_inference_id: Id
/** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */
body?: {
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceAmazonBedrockServiceType
service_settings: InferenceAmazonBedrockServiceSettings
task_settings?: InferenceAmazonBedrockTaskSettings
}
}
export type InferencePutAmazonbedrockResponse = InferenceInferenceEndpointInfo
export interface InferencePutAnthropicRequest extends RequestBase {
task_type: InferenceAnthropicTaskType
anthropic_inference_id: Id
/** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */
body?: {
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceAnthropicServiceType
service_settings: InferenceAnthropicServiceSettings
task_settings?: InferenceAnthropicTaskSettings
}
}
export type InferencePutAnthropicResponse = InferenceInferenceEndpointInfo
export interface InferencePutAzureaistudioRequest extends RequestBase {
task_type: InferenceAzureAiStudioTaskType
azureaistudio_inference_id: Id
/** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */
body?: {
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceAzureAiStudioServiceType
service_settings: InferenceAzureAiStudioServiceSettings
task_settings?: InferenceAzureAiStudioTaskSettings
}
}
export type InferencePutAzureaistudioResponse = InferenceInferenceEndpointInfo
export interface InferencePutAzureopenaiRequest extends RequestBase {
task_type: InferenceAzureOpenAITaskType
azureopenai_inference_id: Id
/** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */
body?: {
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceAzureOpenAIServiceType
service_settings: InferenceAzureOpenAIServiceSettings
task_settings?: InferenceAzureOpenAITaskSettings
}
}
export type InferencePutAzureopenaiResponse = InferenceInferenceEndpointInfo
export interface InferencePutCohereRequest extends RequestBase {
task_type: InferenceCohereTaskType
cohere_inference_id: Id
/** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */
body?: {
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceCohereServiceType
service_settings: InferenceCohereServiceSettings
task_settings?: InferenceCohereTaskSettings
}
}
export type InferencePutCohereResponse = InferenceInferenceEndpointInfo
export interface InferencePutEisRequest extends RequestBase {
task_type: InferencePutEisEisTaskType
task_type: InferenceEisTaskType
eis_inference_id: Id
/** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */
body?: {
service: InferencePutEisServiceType
service_settings: InferencePutEisEisServiceSettings
service: InferenceEisServiceType
service_settings: InferenceEisServiceSettings
}
}
export type InferencePutEisResponse = InferenceInferenceEndpointInfo
export type InferencePutEisServiceType = 'elastic'
export interface InferencePutOpenaiOpenAIServiceSettings {
api_key: string
dimensions?: integer
model_id: string
organization_id?: string
rate_limit?: InferenceRateLimitSetting
url?: string
export interface InferencePutElasticsearchRequest extends RequestBase {
task_type: InferenceElasticsearchTaskType
elasticsearch_inference_id: Id
/** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */
body?: {
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceElasticsearchServiceType
service_settings: InferenceElasticsearchServiceSettings
task_settings?: InferenceElasticsearchTaskSettings
}
}
export interface InferencePutOpenaiOpenAITaskSettings {
user?: string
export type InferencePutElasticsearchResponse = InferenceInferenceEndpointInfo
export interface InferencePutElserRequest extends RequestBase {
task_type: InferenceElserTaskType
elser_inference_id: Id
/** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */
body?: {
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceElserServiceType
service_settings: InferenceElserServiceSettings
}
}
export type InferencePutOpenaiOpenAITaskType = 'chat_completion' | 'completion' | 'text_embedding'
export type InferencePutElserResponse = InferenceInferenceEndpointInfo
export interface InferencePutGoogleaistudioRequest extends RequestBase {
task_type: InferenceGoogleAiStudioTaskType
googleaistudio_inference_id: Id
/** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */
body?: {
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceGoogleAiServiceType
service_settings: InferenceGoogleAiStudioServiceSettings
}
}
export type InferencePutGoogleaistudioResponse = InferenceInferenceEndpointInfo
export interface InferencePutGooglevertexaiRequest extends RequestBase {
task_type: InferenceGoogleVertexAITaskType
googlevertexai_inference_id: Id
/** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */
body?: {
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceGoogleVertexAIServiceType
service_settings: InferenceGoogleVertexAIServiceSettings
task_settings?: InferenceGoogleVertexAITaskSettings
}
}
export type InferencePutGooglevertexaiResponse = InferenceInferenceEndpointInfo
export interface InferencePutHuggingFaceRequest extends RequestBase {
task_type: InferenceHuggingFaceTaskType
huggingface_inference_id: Id
/** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */
body?: {
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceHuggingFaceServiceType
service_settings: InferenceHuggingFaceServiceSettings
}
}
export type InferencePutHuggingFaceResponse = InferenceInferenceEndpointInfo
export interface InferencePutJinaaiRequest extends RequestBase {
task_type: InferenceJinaAITaskType
jinaai_inference_id: Id
/** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */
body?: {
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceJinaAIServiceType
service_settings: InferenceJinaAIServiceSettings
task_settings?: InferenceJinaAITaskSettings
}
}
export type InferencePutJinaaiResponse = InferenceInferenceEndpointInfo
export interface InferencePutMistralRequest extends RequestBase {
task_type: InferenceMistralTaskType
mistral_inference_id: Id
/** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */
body?: {
chunking_settings?: InferenceInferenceChunkingSettings
service: InferenceMistralServiceType
service_settings: InferenceMistralServiceSettings
}
}
export type InferencePutMistralResponse = InferenceInferenceEndpointInfo
export interface InferencePutOpenaiRequest extends RequestBase {
task_type: InferencePutOpenaiOpenAITaskType
task_type: InferenceOpenAITaskType
openai_inference_id: Id
/** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */
body?: {
chunking_settings?: InferenceInferenceChunkingSettings
service: InferencePutOpenaiServiceType
service_settings: InferencePutOpenaiOpenAIServiceSettings
task_settings?: InferencePutOpenaiOpenAITaskSettings
service: InferenceOpenAIServiceType
service_settings: InferenceOpenAIServiceSettings
task_settings?: InferenceOpenAITaskSettings
}
}
export type InferencePutOpenaiResponse = InferenceInferenceEndpointInfo
export type InferencePutOpenaiServiceType = 'openai'
export interface InferencePutVoyageaiRequest extends RequestBase {
task_type: InferencePutVoyageaiVoyageAITaskType
task_type: InferenceVoyageAITaskType
voyageai_inference_id: Id
/** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */
body?: {
chunking_settings?: InferenceInferenceChunkingSettings
service: InferencePutVoyageaiServiceType
service_settings: InferencePutVoyageaiVoyageAIServiceSettings
task_settings?: InferencePutVoyageaiVoyageAITaskSettings
service: InferenceVoyageAIServiceType
service_settings: InferenceVoyageAIServiceSettings
task_settings?: InferenceVoyageAITaskSettings
}
}
export type InferencePutVoyageaiResponse = InferenceInferenceEndpointInfo
export type InferencePutVoyageaiServiceType = 'voyageai'
export interface InferencePutVoyageaiVoyageAIServiceSettings {
dimensions?: integer
model_id: string
rate_limit?: InferenceRateLimitSetting
embedding_type?: float
}
export interface InferencePutVoyageaiVoyageAITaskSettings {
input_type?: string
return_documents?: boolean
top_k?: integer
truncation?: boolean
}
export type InferencePutVoyageaiVoyageAITaskType = 'text_embedding' | 'rerank'
export interface InferencePutWatsonxRequest extends RequestBase {
task_type: InferencePutWatsonxWatsonxTaskType
task_type: InferenceWatsonxTaskType
watsonx_inference_id: Id
/** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */
body?: {
service: InferencePutWatsonxServiceType
service_settings: InferencePutWatsonxWatsonxServiceSettings
service: InferenceWatsonxServiceType
service_settings: InferenceWatsonxServiceSettings
}
}
export type InferencePutWatsonxResponse = InferenceInferenceEndpointInfo
export type InferencePutWatsonxServiceType = 'watsonxai'
export interface InferencePutWatsonxWatsonxServiceSettings {
api_key: string
api_version: string
model_id: string
project_id: string
rate_limit?: InferenceRateLimitSetting
url: string
}
export type InferencePutWatsonxWatsonxTaskType = 'text_embedding'
export interface InferenceRerankRequest extends RequestBase {
inference_id: Id
timeout?: Duration
@ -14942,6 +15355,8 @@ export interface MlExponentialAverageCalculationContext {
previous_exponential_average_ms?: DurationValue<UnitFloatMillis>
}
export type MlFeatureExtractor = MlQueryFeatureExtractor
export interface MlFillMaskInferenceOptions {
mask_token?: string
num_top_classes?: integer
@ -15007,6 +15422,7 @@ export interface MlInferenceConfigCreateContainer {
text_classification?: MlTextClassificationInferenceOptions
zero_shot_classification?: MlZeroShotClassificationInferenceOptions
fill_mask?: MlFillMaskInferenceOptions
learning_to_rank?: MlLearningToRankConfig
ner?: MlNerInferenceOptions
pass_through?: MlPassThroughInferenceOptions
text_embedding?: MlTextEmbeddingInferenceOptions
@ -15153,6 +15569,12 @@ export interface MlJobTimingStats {
minimum_bucket_processing_time_ms?: DurationValue<UnitFloatMillis>
}
export interface MlLearningToRankConfig {
default_params?: Record<string, any>
feature_extractors?: Record<string, MlFeatureExtractor>[]
num_top_feature_importance_values: integer
}
export type MlMemoryStatus = 'ok' | 'soft_limit' | 'hard_limit'
export interface MlModelPackageConfig {
@ -15295,6 +15717,12 @@ export interface MlPerPartitionCategorization {
export type MlPredictedValue = ScalarValue | ScalarValue[]
export interface MlQueryFeatureExtractor {
default_score?: float
feature_name: string
query: QueryDslQueryContainer
}
export interface MlQuestionAnsweringInferenceOptions {
num_top_classes?: integer
tokenization?: MlTokenizationConfigContainer
@ -15339,6 +15767,7 @@ export interface MlTextClassificationInferenceOptions {
tokenization?: MlTokenizationConfigContainer
results_field?: string
classification_labels?: string[]
vocabulary?: MlVocabulary
}
export interface MlTextClassificationInferenceUpdateOptions {
@ -15381,6 +15810,7 @@ export interface MlTokenizationConfigContainer {
bert_ja?: MlNlpBertTokenizationConfig
mpnet?: MlNlpBertTokenizationConfig
roberta?: MlNlpRobertaTokenizationConfig
xlm_roberta?: MlXlmRobertaTokenizationConfig
}
export type MlTokenizationTruncate = 'first' | 'second' | 'none'
@ -15418,6 +15848,11 @@ export interface MlTrainedModelAssignment {
task_parameters: MlTrainedModelAssignmentTaskParameters
}
export interface MlTrainedModelAssignmentRoutingStateAndReason {
reason?: string
routing_state: MlRoutingState
}
export interface MlTrainedModelAssignmentRoutingTable {
reason?: string
routing_state: MlRoutingState
@ -15458,6 +15893,7 @@ export interface MlTrainedModelConfig {
model_size_bytes?: ByteSize
model_package?: MlModelPackageConfig
location?: MlTrainedModelLocation
platform_architecture?: string
prefix_strings?: MlTrainedModelPrefixStrings
}
@ -15491,8 +15927,8 @@ export interface MlTrainedModelDeploymentNodesStats {
number_of_allocations?: integer
number_of_pending_requests?: integer
peak_throughput_per_minute: long
rejection_execution_count?: integer
routing_state: MlTrainedModelAssignmentRoutingTable
rejected_execution_count?: integer
routing_state: MlTrainedModelAssignmentRoutingStateAndReason
start_time?: EpochTime<UnitMillis>
threads_per_allocation?: integer
throughput_last_minute: integer
@ -15593,6 +16029,9 @@ export interface MlVocabulary {
index: IndexName
}
export interface MlXlmRobertaTokenizationConfig extends MlCommonTokenizationConfig {
}
export interface MlZeroShotClassificationInferenceOptions {
tokenization?: MlTokenizationConfigContainer
hypothesis_template?: string
@ -21003,6 +21442,7 @@ export interface TransformGetTransformResponse {
export interface TransformGetTransformTransformSummary {
authorization?: MlTransformAuthorization
create_time?: EpochTime<UnitMillis>
create_time_string?: DateTime
description?: string
dest: ReindexDestination
frequency?: Duration
@ -21028,11 +21468,12 @@ export interface TransformGetTransformStatsCheckpointStats {
export interface TransformGetTransformStatsCheckpointing {
changes_last_detected_at?: long
changes_last_detected_at_date_time?: DateTime
changes_last_detected_at_string?: DateTime
last: TransformGetTransformStatsCheckpointStats
next?: TransformGetTransformStatsCheckpointStats
operations_behind?: long
last_search_time?: long
last_search_time_string?: DateTime
}
export interface TransformGetTransformStatsRequest extends RequestBase {
@ -21048,6 +21489,15 @@ export interface TransformGetTransformStatsResponse {
transforms: TransformGetTransformStatsTransformStats[]
}
export interface TransformGetTransformStatsTransformHealthIssue {
type: string
issue: string
details?: string
count: integer
first_occurrence?: EpochTime<UnitMillis>
first_occurence_string?: DateTime
}
export interface TransformGetTransformStatsTransformIndexerStats {
delete_time_in_ms?: EpochTime<UnitMillis>
documents_indexed: long
@ -21088,6 +21538,7 @@ export interface TransformGetTransformStatsTransformStats {
export interface TransformGetTransformStatsTransformStatsHealth {
status: HealthStatus
issues?: TransformGetTransformStatsTransformHealthIssue[]
}
export interface TransformPreviewTransformRequest extends RequestBase {