Simplify config and port handling

This commit is contained in:
Fred de Gier 2022-10-03 14:13:10 +02:00
commit 87f4f53e27
5 changed files with 70 additions and 45 deletions

View file

@ -8,7 +8,7 @@ from models import OpenAIinput
from utils.codegen import CodeGenProxy
codegen = CodeGenProxy(
host=os.environ.get("TRITON_HOST", "localhost"),
host=os.environ.get("TRITON_HOST", "triton"),
port=os.environ.get("TRITON_PORT", 8001),
verbose=os.environ.get("TRITON_VERBOSITY", False)
)
@ -41,4 +41,4 @@ async def completions(data: OpenAIinput):
)
if __name__ == "__main__":
uvicorn.run("app:app", host=os.environ.get("API_HOST", "0.0.0.0"), port=os.environ.get("API_PORT", 5000))
uvicorn.run("app:app", host="0.0.0.0", port=5000)

View file

@ -13,7 +13,7 @@ np.finfo(np.dtype("float64"))
class CodeGenProxy:
def __init__(self, host: str = 'localhost', port: int = 8001, verbose: bool = False):
def __init__(self, host: str = 'triton', port: int = 8001, verbose: bool = False):
self.tokenizer = Tokenizer.from_file('/python-docker/cgtok/tokenizer.json')
self.client = client_util.InferenceServerClient(url=f'{host}:{port}', verbose=verbose)
self.PAD_CHAR = 50256