Create APIs that can respond to prediction requests in real-time.
$ mkdir text-generator && cd text-generator
$ touch predictor.py requirements.txt text_generator.yaml
# predictor.py
from transformers import pipeline
class PythonPredictor:
def __init__(self, config):
self.model = pipeline(task="text-generation")
def predict(self, payload):
return self.model(payload["text"])[0]
# requirements.txt
transformers
torch
# text_generator.yaml
- name: text-generator
kind: RealtimeAPI
predictor:
type: python
path: predictor.py
compute:
gpu: 1
$ cortex deploy text_generator.yaml
$ cortex get text-generator --watch
$ cortex logs text-generator
$ curl http://***.elb.us-west-2.amazonaws.com/text-generator -X POST -H "Content-Type: application/json" -d '{"text": "hello world"}'
$ cortex delete text-generator