Example

Expose multiple RealtimeAPIs as a single endpoint for A/B tests, multi-armed bandits, or canary deployments.

Deploy APIs

import cortex

class Handler:
    def __init__(self, config):
        from transformers import pipeline
        self.model = pipeline(task="text-generation")

    def handle_post(self, payload):
        return self.model(payload["text"])[0]

requirements = ["tensorflow", "transformers"]

api_spec_cpu = {
    "name": "text-generator-cpu",
    "kind": "RealtimeAPI",
    "compute": {
        "cpu": 1,
    },
}

api_spec_gpu = {
    "name": "text-generator-gpu",
    "kind": "RealtimeAPI",
    "compute": {
        "gpu": 1,
    },
}

cx = cortex.client("cortex")
cx.deploy_realtime_api(api_spec_cpu, handler=Handler, requirements=requirements)
cx.deploy_realtime_api(api_spec_gpu, handler=Handler, requirements=requirements)

Deploy a traffic splitter

Update the weights of the traffic splitter

Last updated