mirror of
https://github.com/fauxpilot/fauxpilot.git
synced 2025-08-21 13:54:29 -07:00
Add python backend support
- Modify dockerfile to include bitsandbytes, transformers and latest version of pytorch - Minor modifications in utils/codegen.py so that same client works with FT and Py-backend - Minor modifications in launch.sh (no need to name models by GPU) - Add installation script for adding a new python model (with super simple config_template) - Modify setup.sh so that it aworks with both FT and Python backend models Signed-off-by: Parth Thakkar <thakkarparth007@gmail.com>
This commit is contained in:
parent
9b2bc84670
commit
01f1cbb629
9 changed files with 487 additions and 73 deletions
|
@ -1,11 +1,14 @@
|
|||
version: '3.3'
|
||||
services:
|
||||
triton:
|
||||
image: moyix/triton_with_ft:22.09
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
command: bash -c "CUDA_VISIBLE_DEVICES=${GPUS} mpirun -n 1 --allow-run-as-root /opt/tritonserver/bin/tritonserver --model-repository=/model"
|
||||
shm_size: '2gb'
|
||||
volumes:
|
||||
- ${MODEL_DIR}:/model
|
||||
- ${HF_CACHE_DIR}:/root/.cache/huggingface
|
||||
ports:
|
||||
- "8000:8000"
|
||||
- "8001:8001"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue