mirror of
https://github.com/fauxpilot/fauxpilot.git
synced 2025-07-30 11:39:46 -07:00
Some minor ergonomic changes for python backend
- Add validation rule to ensure is set to fastertransformer or python-backend - Add warning if model is unavailable, likely the user has not set correctly Signed-off-by: Parth Thakkar <thakkarparth007@gmail.com>
This commit is contained in:
parent
8df5058c5c
commit
4bf40cdb6c
3 changed files with 14 additions and 7 deletions
|
@ -1,10 +1,10 @@
|
||||||
from typing import Optional, Union
|
from typing import Optional, Union
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel, constr
|
||||||
|
|
||||||
|
|
||||||
class OpenAIinput(BaseModel):
|
class OpenAIinput(BaseModel):
|
||||||
model: str = "fastertransformer"
|
model: constr(regex="^(fastertransformer|py-model)$") = "fastertransformer"
|
||||||
prompt: Optional[str]
|
prompt: Optional[str]
|
||||||
suffix: Optional[str]
|
suffix: Optional[str]
|
||||||
max_tokens: Optional[int] = 16
|
max_tokens: Optional[int] = 16
|
||||||
|
|
|
@ -97,7 +97,7 @@ class CodeGenProxy:
|
||||||
output_len = np.ones_like(input_len).astype(np_type) * max_tokens
|
output_len = np.ones_like(input_len).astype(np_type) * max_tokens
|
||||||
num_logprobs = data.get('logprobs', -1)
|
num_logprobs = data.get('logprobs', -1)
|
||||||
if num_logprobs is None:
|
if num_logprobs is None:
|
||||||
num_logprobs = 1
|
num_logprobs = -1
|
||||||
want_logprobs = num_logprobs > 0
|
want_logprobs = num_logprobs > 0
|
||||||
|
|
||||||
temperature = data.get('temperature', 0.2)
|
temperature = data.get('temperature', 0.2)
|
||||||
|
@ -246,8 +246,15 @@ class CodeGenProxy:
|
||||||
st = time.time()
|
st = time.time()
|
||||||
try:
|
try:
|
||||||
completion, choices = self.generate(data)
|
completion, choices = self.generate(data)
|
||||||
except InferenceServerException as E:
|
except InferenceServerException as exc:
|
||||||
print(E)
|
# status: unavailable -- this happens if the `model` string is invalid
|
||||||
|
print(exc)
|
||||||
|
if exc.status() == 'StatusCode.UNAVAILABLE':
|
||||||
|
print(
|
||||||
|
f"WARNING: Model '{data['model']}' is not available. Please ensure that "
|
||||||
|
"`model` is set to either 'fastertransformer' or 'py-model' depending on "
|
||||||
|
"your installation"
|
||||||
|
)
|
||||||
completion = {}
|
completion = {}
|
||||||
choices = []
|
choices = []
|
||||||
ed = time.time()
|
ed = time.time()
|
||||||
|
|
|
@ -78,13 +78,13 @@ def load_test_env():
|
||||||
return env
|
return env
|
||||||
|
|
||||||
def run_inference(
|
def run_inference(
|
||||||
prompt: str, model_name: str = "py-model", port: int = 5000, return_all: bool = False,
|
prompt: str, model: str = "py-model", port: int = 5000, return_all: bool = False,
|
||||||
**kwargs
|
**kwargs
|
||||||
) -> Union[str, Dict]:
|
) -> Union[str, Dict]:
|
||||||
"Invokes the copilot proxy with the given prompt and returns the completion"
|
"Invokes the copilot proxy with the given prompt and returns the completion"
|
||||||
endpoint = f"http://localhost:{port}/v1/engines/codegen/completions"
|
endpoint = f"http://localhost:{port}/v1/engines/codegen/completions"
|
||||||
data = {
|
data = {
|
||||||
"model": model_name,
|
"model": model,
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
"suffix": kwargs.get("suffix", ""),
|
"suffix": kwargs.get("suffix", ""),
|
||||||
"max_tokens": kwargs.get("max_tokens", 16),
|
"max_tokens": kwargs.get("max_tokens", 16),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue