Some minor ergonomic changes for python backend

- Add validation rule to ensure  is set to fastertransformer or python-backend
- Add warning if model is unavailable, likely the user has not set  correctly

Signed-off-by: Parth Thakkar <thakkarparth007@gmail.com>
This commit is contained in:
Parth Thakkar 2023-01-02 18:54:51 +05:30
parent 8df5058c5c
commit 4bf40cdb6c
3 changed files with 14 additions and 7 deletions

View file

@ -78,13 +78,13 @@ def load_test_env():
return env
def run_inference(
prompt: str, model_name: str = "py-model", port: int = 5000, return_all: bool = False,
prompt: str, model: str = "py-model", port: int = 5000, return_all: bool = False,
**kwargs
) -> Union[str, Dict]:
"Invokes the copilot proxy with the given prompt and returns the completion"
endpoint = f"http://localhost:{port}/v1/engines/codegen/completions"
data = {
"model": model_name,
"model": model,
"prompt": prompt,
"suffix": kwargs.get("suffix", ""),
"max_tokens": kwargs.get("max_tokens", 16),