Resolve merge conflicts and fix issues with setup.sh

2025-08-20 21:34:13 -07:00 · 2022-10-20 16:09:12 +02:00 · 2022-10-20 16:09:12 +02:00 · 2a91018792
commit 2a91018792
parent 4f936c3049 31d2349dbb
9 changed files with 115 additions and 92 deletions
--- a/setup.sh
+++ b/setup.sh
@ -1,9 +1,15 @@
 #!/usr/bin/env bash

-if [ -f config.env ]; then
-    echo "config.env already exists, skipping"
-    echo "Please delete config.env if you want to re-run this script"
-    exit 1
+if [ -f .env ]; then
+    read -rp ".env already exists, do you want to delete .env and recreate it? [y/n] " DELETE
+    if [[ ${DELETE:-y} =~ ^[Yy]$ ]]
+    then
+      echo "Deleting .env"
+      rm .env
+    else
+      echo "Exiting"
+      exit 0
+    fi;
 fi

 function check_dep(){
@ -17,6 +23,34 @@ check_dep curl
 check_dep zstd
 check_dep docker

+# Read number of GPUs
+read -rp "Enter number of GPUs [1]: " NUM_GPUS
+NUM_GPUS=${NUM_GPUS:-1}
+
+read -rp "External port for the API [5000]: " API_EXTERNAL_PORT
+API_EXTERNAL_PORT=${API_EXTERNAL_PORT:-5000}
+
+read -rp "Address for Triton [triton]: " TRITON_HOST
+TRITON_HOST=${TRITON_HOST:-triton}
+
+read -rp "Port of Triton host [8001]: " TRITON_PORT
+TRITON_PORT=${TRITON_PORT:-8001}
+
+# Read model directory
+read -rp "Where do you want to save the model [$(pwd)/models]? " MODEL_DIR
+if [ -z "$MODEL_DIR" ]; then
+    MODEL_DIR="$(pwd)/models"
+else
+    MODEL_DIR="$(readlink -m "${MODEL_DIR}")"
+fi
+
+# Write .env
+echo "NUM_GPUS=${NUM_GPUS}" >> .env
+echo "MODEL_DIR=${MODEL_DIR}/${MODEL}-${NUM_GPUS}gpu" >> .env
+echo "API_EXTERNAL_PORT=${API_EXTERNAL_PORT}" >> .env
+echo "TRITON_HOST=${TRITON_HOST}" >> .env
+echo "TRITON_PORT=${TRITON_PORT}" >> .env
+echo "GPUS=$(seq 0 $(( NUM_GPUS - 1)) | paste -s -d ',' -)" >> .env

 function fastertransformer_backend(){
    echo "Models available:"
@ -29,7 +63,7 @@ function fastertransformer_backend(){
    echo "[7] codegen-16B-mono (32GB total VRAM required; Python-only)"
    echo "[8] codegen-16B-multi (32GB total VRAM required; multi-language)"
    # Read their choice
-    read -p "Enter your choice [6]: " MODEL_NUM
+    read -rp "Enter your choice [6]: " MODEL_NUM

    # Convert model number to model name
    case $MODEL_NUM in
@ -44,36 +78,29 @@ function fastertransformer_backend(){
        *) MODEL="codegen-6B-multi" ;;
    esac

-    # Read number of GPUs
-    read -p "Enter number of GPUs [1]: " NUM_GPUS
-    NUM_GPUS=${NUM_GPUS:-1}
+    echo "MODEL=${MODEL}" > .env

-    # Read model directory
-    read -p "Where do you want to save the model [$(pwd)/models]? " MODEL_DIR
-    if [ -z "$MODEL_DIR" ]; then
-        MODEL_DIR="$(pwd)/models"
+    if (test -d "$MODEL_DIR"/"${MODEL}"-"${NUM_GPUS}"gpu ); then
+      echo "$MODEL_DIR"/"${MODEL}"-"${NUM_GPUS}"gpu
+      echo "Converted model for ${MODEL}-${NUM_GPUS}gpu already exists."
+      read -rp "Do you want to re-use it? y/n: " REUSE_CHOICE
+      if [[ ${REUSE_CHOICE:-y} =~ ^[Yy]$ ]]
+      then
+        DOWNLOAD_MODEL=n
+        echo "Re-using model"
+      else
+        DOWNLOAD_MODEL=y
+        rm -rf "$MODEL_DIR"/"${MODEL}"-"${NUM_GPUS}"gpu
+      fi
    else
-        MODEL_DIR="$(readlink -m "${MODEL_DIR}")"
+      DOWNLOAD_MODEL=y
    fi

-    # Write config.env
-    echo "MODEL=${MODEL}" > config.env
-    echo "NUM_GPUS=${NUM_GPUS}" >> config.env
-    echo "MODEL_DIR=${MODEL_DIR}" >> config.env
-
-    if [ -d "$MODEL_DIR"/"${MODEL}"-${NUM_GPUS}gpu ]; then
-        echo "Converted model for ${MODEL}-${NUM_GPUS}gpu already exists."
-        read -p "Do you want to re-use it? y/n: " REUSE_CHOICE
-        if [ "${REUSE_CHOICE^^}" = "Y" ]; then
-            exit 0
-        fi
-    fi
-
-    # Create model directory
-    mkdir -p "${MODEL_DIR}"
-
-    # For some of the models we can download it preconverted.
-    if [ $NUM_GPUS -le 2 ]; then
+    if [[ ${DOWNLOAD_MODEL:-y} =~ ^[Yy]$ ]]
+    then
+      # Create model directory
+      mkdir -p "${MODEL_DIR}"
+      if [ "$NUM_GPUS" -le 2 ]; then
        echo "Downloading the model from HuggingFace, this will take a while..."
        SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
        DEST="${MODEL}-${NUM_GPUS}gpu"
@ -83,11 +110,11 @@ function fastertransformer_backend(){
            -o "$ARCHIVE"
        zstd -dc "$ARCHIVE" | tar -xf - -C "${MODEL_DIR}"
        rm -f "$ARCHIVE"
-    else
+      else
        echo "Downloading and converting the model, this will take a while..."
-        docker run --rm -v ${MODEL_DIR}:/models -e MODEL=${MODEL} -e NUM_GPUS=${NUM_GPUS} moyix/model_converter:latest
+        docker run --rm -v "${MODEL_DIR}":/models -e MODEL=${MODEL} -e NUM_GPUS="${NUM_GPUS}" moyix/model_converter:latest
+      fi
    fi
-    echo "Done! Now run ./launch.sh to start the FauxPilot server."
 }

 function python_backend(){
@ -96,12 +123,8 @@ function python_backend(){
    echo "[2] codegen-350M-multi (1GB total VRAM required; multi-language)"
    echo "[3] codegen-2B-mono (4GB total VRAM required; Python-only)"
    echo "[4] codegen-2B-multi (4GB total VRAM required; multi-language)"
-    # echo "[5] codegen-6B-mono (13GB total VRAM required; Python-only)"
-    # echo "[6] codegen-6B-multi (13GB total VRAM required; multi-language)"
-    # echo "[7] codegen-16B-mono (32GB total VRAM required; Python-only)"
-    # echo "[8] codegen-16B-multi (32GB total VRAM required; multi-language)"
-    # Read their choice
-    read -p "Enter your choice [4]: " MODEL_NUM
+
+    read -rp "Enter your choice [4]: " MODEL_NUM

    # Convert model number to model name
    case $MODEL_NUM in
@ -109,36 +132,22 @@ function python_backend(){
        2) MODEL="codegen-350M-multi"; ORG="Salesforce" ;;
        3) MODEL="codegen-2B-mono"; ORG="Salesforce" ;;
        4) MODEL="codegen-2B-multi"; ORG="Salesforce" ;;
+        *) MODEL="codegen-2B-multi"; ORG="Salesforce" ;;
    esac

-    # Read number of GPUs -- not strictly required for python backend, because of device_map="auto",
-    # but docker-compose.py uses it to select CUDA_VISIBLE_DEVICES
-    read -p "Enter number of GPUs [1]: " NUM_GPUS
-    NUM_GPUS=${NUM_GPUS:-1}
-
-    # Read model directory
-    read -p "Where do you want to save the model [$(pwd)/models]? " MODEL_DIR
-    MODEL_DIR=${MODEL_DIR:-$(pwd)/models}
-    if [ -z "$MODEL_DIR" ]; then
-        MODEL_DIR="$(pwd)/models"
-    else
-        MODEL_DIR="$(readlink -m "${MODEL_DIR}")"
-    fi
-
    # share huggingface cache? Should be safe to share, but permission issues may arise depending upon your docker setup
-    read -p "Do you want to share your huggingface cache between host and docker container? y/n [n]: " SHARE_HF_CACHE
+    read -rp "Do you want to share your huggingface cache between host and docker container? y/n [n]: " SHARE_HF_CACHE
    SHARE_HF_CACHE=${SHARE_HF_CACHE:-n}
-    if [ "${SHARE_HF_CACHE^^}" = "Y" ]; then
-        read -p "Enter your huggingface cache directory [$HOME/.cache/huggingface]: " HF_CACHE_DIR
+    if [[ ${SHARE_HF_CACHE:-y} =~ ^[Yy]$ ]]; then
+        read -rp "Enter your huggingface cache directory [$HOME/.cache/huggingface]: " HF_CACHE_DIR
        HF_CACHE_DIR=${HF_CACHE_DIR:-$HOME/.cache/huggingface}
    else
        HF_CACHE_DIR="/tmp/hf_cache"
    fi

    # use int8? Allows larger models to fit in GPU but might be very marginally slower
-    read -p "Do you want to use int8? y/n [y]: " USE_INT8
-    USE_INT8=${USE_INT8:-y}
-    if [ "${USE_INT8^^}" = "N" ]; then
+    read -rp "Do you want to use int8? y/n [y]: " USE_INT8
+    if [[ ${USE_INT8:-y} =~ ^[Nn]$ ]]; then
        USE_INT8="0"
    else
        USE_INT8="1"
@ -146,25 +155,31 @@ function python_backend(){

    # Write config.env
    echo "MODEL=py-${MODEL}" > config.env
-    echo "NUM_GPUS=${NUM_GPUS}" >> config.env
-    echo "MODEL_DIR=${MODEL_DIR}" >> config.env
    echo "HF_CACHE_DIR=${HF_CACHE_DIR}" >> config.env

    # Create model directory
    mkdir -p "${MODEL_DIR}/"
+
    python3 ./python_backend/init_model.py --model_name "${MODEL}" --org_name "${ORG}" --model_dir "${MODEL_DIR}" --use_int8 "${USE_INT8}"
-    
-    echo "Done! Now run ./launch.sh to start the FauxPilot server."
 }

 # choose backend
 echo "Choose your backend:"
 echo "[1] FasterTransformer backend (faster, but limited models)"
 echo "[2] Python backend (slower, but more models, and allows loading with int8)"
-read -p "Enter your choice [1]: " BACKEND_NUM
+read -rp "Enter your choice [1]: " BACKEND_NUM

-if [ $BACKEND_NUM -eq 2 ]; then
+if [ "$BACKEND_NUM" -eq 2 ]; then
    python_backend
 else
    fastertransformer_backend
-fi
+fi
+
+read -rp "Config complete, do you want to run FauxPilot? [y/n] " RUN
+if [[ ${RUN:-y} =~ ^[Yy]$ ]]
+then
+  bash ./launch.sh
+else
+  echo "You can run ./launch.sh to start the FauxPilot server."
+  exit 0
+fi;