Configuration
- name: <string> # name of the API (required)
kind: BatchAPI # must be "BatchAPI" for batch APIs (required)
pod: # pod configuration (required)
port: <int> # port to which requests will be sent (default: 8080; exported as $CORTEX_PORT)
containers: # configurations for the containers to run (at least one constainer must be provided)
- name: <string> # name of the container (required)
image: <string> # docker image to use for the container (required)
command: <list[string]> # entrypoint (not executed within a shell); env vars can be used with e.g. $(CORTEX_PORT) (required)
args: <list[string]> # arguments to the entrypoint; env vars can be used with e.g. $(CORTEX_PORT) (default: no args)
env: <map[string:string]> # dictionary of environment variables to set in the container (optional)
compute: # compute resource requests (default: see below)
cpu: <string|int|float> # CPU request for the container; one unit of CPU corresponds to one virtual CPU; fractional requests are allowed, and can be specified as a floating point number or via the "m" suffix (default: 200m)
gpu: <int> # GPU request for the container; one unit of GPU corresponds to one virtual GPU (default: 0)
inf: <int> # Inferentia request for the container; one unit of inf corresponds to one virtual Inferentia chip (default: 0)
mem: <string> # memory request for the container; one unit of memory is one byte and can be expressed as an integer or by using one of these suffixes: K, M, G, T (or their power-of two counterparts: Ki, Mi, Gi, Ti) (default: Null)
shm: <string> # size of shared memory (/dev/shm) for sharing data between multiple processes, e.g. 64Mi or 1Gi (default: Null)
readiness_probe: # periodic probe of container readiness; traffic will not be sent into the pod unless all containers' readiness probes are succeeding (optional)
http_get: # specifies an http endpoint which must respond with status code 200 (only one of http_get, tcp_socket, and exec may be specified)
port: <int|string> # the port to access on the container (required)
path: <string> # the path to access on the HTTP server (default: /)
tcp_socket: # specifies a port which must be ready to receive traffic (only one of http_get, tcp_socket, and exec may be specified)
port: <int|string> # the port to access on the container (required)
initial_delay_seconds: <int> # number of seconds after the container has started before the probe is initiated (default: 0)
timeout_seconds: <int> # number of seconds until the probe times out (default: 1)
period_seconds: <int> # how often (in seconds) to perform the probe (default: 10)
success_threshold: <int> # minimum consecutive successes for the probe to be considered successful after having failed (default: 1)
failure_threshold: <int> # minimum consecutive failures for the probe to be considered failed after having succeeded (default: 3)
liveness_probe: # periodic probe of container liveness; container will be restarted if the probe fails (optional)
http_get: # specifies an http endpoint which must respond with status code 200 (only one of http_get, tcp_socket, and exec may be specified)
port: <int|string> # the port to access on the container (required)
path: <string> # the path to access on the HTTP server (default: /)
tcp_socket: # specifies a port which must be ready to receive traffic (only one of http_get, tcp_socket, and exec may be specified)
port: <int|string> # the port to access on the container (required)
exec: # specifies a command to run which must exit with code 0 (only one of http_get, tcp_socket, and exec may be specified)
command: <list[string]> # the command to execute inside the container, which is exec'd (not run inside a shell); the working directory is root ('/') in the container's filesystem (required)
initial_delay_seconds: <int> # number of seconds after the container has started before the probe is initiated (default: 0)
timeout_seconds: <int> # number of seconds until the probe times out (default: 1)
period_seconds: <int> # how often (in seconds) to perform the probe (default: 10)
success_threshold: <int> # minimum consecutive successes for the probe to be considered successful after having failed (default: 1)
failure_threshold: <int> # minimum consecutive failures for the probe to be considered failed after having succeeded (default: 3)
node_groups: <list[string]> # a list of node groups on which this API can run (default: all node groups are eligible)
networking: # networking configuration (default: see below)
endpoint: <string> # endpoint for the API (default: <api_name>)
Last updated