mirror of
https://github.com/fauxpilot/fauxpilot.git
synced 2025-08-14 10:37:43 -07:00
Resolve merge conflicts and fix issues with setup.sh
This commit is contained in:
commit
2a91018792
9 changed files with 115 additions and 92 deletions
|
@ -10,4 +10,4 @@ COPY copilot_proxy .
|
|||
|
||||
EXPOSE 5000
|
||||
|
||||
CMD [ "uvicorn", "--host", "0.0.0.0", "--port", "5000", "app:app"]
|
||||
CMD ["uvicorn", "--host", "0.0.0.0", "--port", "5000", "app:app"]
|
||||
|
|
|
@ -41,4 +41,4 @@ async def completions(data: OpenAIinput):
|
|||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run("app:app", host=os.environ.get("API_HOST", "0.0.0.0"), port=os.environ.get("API_PORT", 5000))
|
||||
uvicorn.run("app:app", host="0.0.0.0", port=5000)
|
||||
|
|
|
@ -4,7 +4,7 @@ from pydantic import BaseModel
|
|||
|
||||
|
||||
class OpenAIinput(BaseModel):
|
||||
model: str = "fastertransformer|py-model"
|
||||
model: str = "fastertransformer"
|
||||
prompt: Optional[str]
|
||||
suffix: Optional[str]
|
||||
max_tokens: Optional[int] = 16
|
||||
|
|
|
@ -6,14 +6,14 @@ import time
|
|||
import numpy as np
|
||||
import tritonclient.grpc as client_util
|
||||
from tokenizers import Tokenizer
|
||||
from tritonclient.utils import np_to_triton_dtype
|
||||
from tritonclient.utils import np_to_triton_dtype, InferenceServerException
|
||||
|
||||
np.finfo(np.dtype("float32"))
|
||||
np.finfo(np.dtype("float64"))
|
||||
|
||||
|
||||
class CodeGenProxy:
|
||||
def __init__(self, host: str = 'localhost', port: int = 8001, verbose: bool = False):
|
||||
def __init__(self, host: str = 'triton', port: int = 8001, verbose: bool = False):
|
||||
self.tokenizer = Tokenizer.from_file('/python-docker/cgtok/tokenizer.json')
|
||||
self.client = client_util.InferenceServerClient(url=f'{host}:{port}', verbose=verbose)
|
||||
self.PAD_CHAR = 50256
|
||||
|
@ -234,7 +234,12 @@ class CodeGenProxy:
|
|||
|
||||
def __call__(self, data: dict):
|
||||
st = time.time()
|
||||
completion, choices = self.generate(data)
|
||||
try:
|
||||
completion, choices = self.generate(data)
|
||||
except InferenceServerException as E:
|
||||
print(E)
|
||||
completion = {}
|
||||
choices = []
|
||||
ed = time.time()
|
||||
print(f"Returned completion in {(ed - st) * 1000} ms")
|
||||
if data.get('stream', False):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue