mirror of
https://github.com/fauxpilot/fauxpilot.git
synced 2025-08-20 13:24:27 -07:00
Simplify config and port handling
This commit is contained in:
parent
6f49915d2a
commit
87f4f53e27
5 changed files with 70 additions and 45 deletions
|
@ -8,7 +8,7 @@ from models import OpenAIinput
|
|||
from utils.codegen import CodeGenProxy
|
||||
|
||||
codegen = CodeGenProxy(
|
||||
host=os.environ.get("TRITON_HOST", "localhost"),
|
||||
host=os.environ.get("TRITON_HOST", "triton"),
|
||||
port=os.environ.get("TRITON_PORT", 8001),
|
||||
verbose=os.environ.get("TRITON_VERBOSITY", False)
|
||||
)
|
||||
|
@ -41,4 +41,4 @@ async def completions(data: OpenAIinput):
|
|||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run("app:app", host=os.environ.get("API_HOST", "0.0.0.0"), port=os.environ.get("API_PORT", 5000))
|
||||
uvicorn.run("app:app", host="0.0.0.0", port=5000)
|
||||
|
|
|
@ -13,7 +13,7 @@ np.finfo(np.dtype("float64"))
|
|||
|
||||
|
||||
class CodeGenProxy:
|
||||
def __init__(self, host: str = 'localhost', port: int = 8001, verbose: bool = False):
|
||||
def __init__(self, host: str = 'triton', port: int = 8001, verbose: bool = False):
|
||||
self.tokenizer = Tokenizer.from_file('/python-docker/cgtok/tokenizer.json')
|
||||
self.client = client_util.InferenceServerClient(url=f'{host}:{port}', verbose=verbose)
|
||||
self.PAD_CHAR = 50256
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue