Add python backend support

- Modify dockerfile to include bitsandbytes, transformers and latest version of pytorch - Minor modifications in utils/codegen.py so that same client works with FT and Py-backend - Minor modifications in launch.sh (no need to name models by GPU) - Add installation script for adding a new python model (with super simple config_template) - Modify setup.sh so that it aworks with both FT and Python backend models Signed-off-by: Parth Thakkar <thakkarparth007@gmail.com>
2025-08-21 13:54:29 -07:00 · 2022-10-16 22:05:00 -05:00 · 2022-10-16 22:05:00 -05:00 · 01f1cbb629
commit 01f1cbb629
parent 9b2bc84670
9 changed files with 487 additions and 73 deletions
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -1,11 +1,14 @@
 version: '3.3'
 services:
  triton:
-    image: moyix/triton_with_ft:22.09
+    build:
+      context: .
+      dockerfile: Dockerfile
    command: bash -c "CUDA_VISIBLE_DEVICES=${GPUS} mpirun -n 1 --allow-run-as-root /opt/tritonserver/bin/tritonserver --model-repository=/model"
    shm_size: '2gb'
    volumes:
      - ${MODEL_DIR}:/model
+      - ${HF_CACHE_DIR}:/root/.cache/huggingface
    ports:
      - "8000:8000"
      - "8001:8001"