Simplify config and port handling

2025-07-08 05:51:26 -07:00 · 2022-10-03 14:13:10 +02:00 · 2022-10-03 14:13:10 +02:00 · 87f4f53e27
commit 87f4f53e27
parent 6f49915d2a
5 changed files with 70 additions and 45 deletions
--- a/copilot_proxy/app.py
+++ b/copilot_proxy/app.py
@ -8,7 +8,7 @@ from models import OpenAIinput
 from utils.codegen import CodeGenProxy
 codegen = CodeGenProxy(
-    host=os.environ.get("TRITON_HOST", "localhost"),
+    host=os.environ.get("TRITON_HOST", "triton"),
    port=os.environ.get("TRITON_PORT", 8001),
    verbose=os.environ.get("TRITON_VERBOSITY", False)
 )
@ -41,4 +41,4 @@ async def completions(data: OpenAIinput):
        )
 if __name__ == "__main__":
-    uvicorn.run("app:app", host=os.environ.get("API_HOST", "0.0.0.0"), port=os.environ.get("API_PORT", 5000))
+    uvicorn.run("app:app", host="0.0.0.0", port=5000)
--- a/copilot_proxy/utils/codegen.py
+++ b/copilot_proxy/utils/codegen.py
@ -13,7 +13,7 @@ np.finfo(np.dtype("float64"))
 class CodeGenProxy:
-    def __init__(self, host: str = 'localhost', port: int = 8001, verbose: bool = False):
+    def __init__(self, host: str = 'triton', port: int = 8001, verbose: bool = False):
        self.tokenizer = Tokenizer.from_file('/python-docker/cgtok/tokenizer.json')
        self.client = client_util.InferenceServerClient(url=f'{host}:{port}', verbose=verbose)
        self.PAD_CHAR = 50256
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -23,8 +23,7 @@ services:
 #    build:
 #      context: .
 #      dockerfile: copilot_proxy/Dockerfile
    command: uvicorn app:app --host $API_HOST --port $API_PORT
    env_file:
      - .env
    ports:
-      - "5001:${API_PORT}"
+      - "${API_EXTERNAL_PORT}:5000"
--- a/launch.sh
+++ b/launch.sh
@ -1,11 +1,11 @@
 #!/usr/bin/env bash
-# Read in config.env file; error if not found
+# Read in .env file; error if not found
-if [ ! -f config.env ]; then
+if [ ! -f .env ]; then
-    echo "config.env not found, please run setup.sh"
+    echo ".env not found, running setup.sh"
-    exit 1
+    bash setup.sh
 fi
-source config.env
+source .env
 # On newer versions, docker-compose is docker compose
 DOCKER_COMPOSE=$(command -v docker-compose)
@ -13,7 +13,4 @@ if [ -z "$DOCKER_COMPOSE" ]; then
    DOCKER_COMPOSE="docker compose"
 fi
 export NUM_GPUS=${NUM_GPUS}
 export MODEL_DIR="${MODEL_DIR}"/"${MODEL}-${NUM_GPUS}gpu"
 export GPUS=$(seq 0 $(( NUM_GPUS - 1 )) | paste -sd ',')
 $DOCKER_COMPOSE up
--- a/setup.sh
+++ b/setup.sh
@ -1,9 +1,15 @@
 #!/bin/bash
-if [ -f config.env ]; then
+if [ -f .env ]; then
-    echo "config.env already exists, skipping"
+    read -rp ".env already exists, do you want to delete .env and recreate it? [y/n] " DELETE
-    echo "Please delete config.env if you want to re-run this script"
+    if [[ ${DELETE:-y} =~ ^[Yy]$ ]]
-    exit 0
+    then
      echo "Deleting .env"
      rm .env
    else
      echo "Exiting"
      exit 0
    fi;
 fi
 echo "Models available:"
@ -16,7 +22,7 @@ echo "[6] codegen-6B-multi (13GB total VRAM required; multi-language)"
 echo "[7] codegen-16B-mono (32GB total VRAM required; Python-only)"
 echo "[8] codegen-16B-multi (32GB total VRAM required; multi-language)"
 # Read their choice
-read -p "Enter your choice [6]: " MODEL_NUM
+read -rp "Enter your choice [6]: " MODEL_NUM
 # Convert model number to model name
 case $MODEL_NUM in
@ -32,45 +38,68 @@ case $MODEL_NUM in
 esac
 # Read number of GPUs
-read -p "Enter number of GPUs [1]: " NUM_GPUS
+read -rp "Enter number of GPUs [1]: " NUM_GPUS
 NUM_GPUS=${NUM_GPUS:-1}
 read -rp "External port for the API [5000]: " API_EXTERNAL_PORT
 API_EXTERNAL_PORT=${API_EXTERNAL_PORT:-5000}
 read -rp "Address for Triton [triton]: " TRITON_HOST
 TRITON_HOST=${TRITON_HOST:-triton}
 read -rp "Port of Triton host [8001]: " TRITON_PORT
 TRITON_PORT=${TRITON_PORT:-8001}
 # Read model directory
-read -p "Where do you want to save the model [$(pwd)/models]? " MODEL_DIR
+read -rp "Where do you want to save the model [$(pwd)/models]? " MODEL_DIR
 if [ -z "$MODEL_DIR" ]; then
    MODEL_DIR="$(pwd)/models"
 else
    MODEL_DIR="$(readlink -m "${MODEL_DIR}")"
 fi
-# Write config.env
+# Write .env
-echo "MODEL=${MODEL}" > config.env
+echo "MODEL=${MODEL}" > .env
-echo "NUM_GPUS=${NUM_GPUS}" >> config.env
+echo "NUM_GPUS=${NUM_GPUS}" >> .env
-echo "MODEL_DIR=${MODEL_DIR}" >> config.env
+echo "MODEL_DIR=${MODEL_DIR}/${MODEL}-${NUM_GPUS}gpu" >> .env
 echo "API_EXTERNAL_PORT=${API_EXTERNAL_PORT}" >> .env
 echo "TRITON_HOST=${TRITON_HOST}" >> .env
 echo "TRITON_PORT=${TRITON_PORT}" >> .env
 GPUS="$(seq -s ','  -t '\n' "${NUM_GPUS}")"
 echo "GPUS=${GPUS%?}" >> .env
 if [ -d "$MODEL_DIR"/"${MODEL}"-"${NUM_GPUS}"gpu ]; then
    echo "Converted model for ${MODEL}-${NUM_GPUS}gpu already exists."
-    read -p "Do you want to re-use it? y/n: " REUSE_CHOICE
+    read -rp "Do you want to re-use it? y/n: " REUSE_CHOICE
-    if ${REUSE_CHOICE^^} != "Y"; then
+    if [ "${REUSE_CHOICE^^}" = "Y" ]; then
-      # Create model directory
+        exit 0
      mkdir -p "${MODEL_DIR}"
      # For some of the models we can download it pre-converted.
      if [ "$NUM_GPUS" -le 2 ]; then
          echo "Downloading the model from HuggingFace, this will take a while..."
          SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
          DEST="${MODEL}-${NUM_GPUS}gpu"
          ARCHIVE="${MODEL_DIR}/${DEST}.tar.zst"
          cp -r "$SCRIPT_DIR"/converter/models/"$DEST" "${MODEL_DIR}"
          curl -L "https://huggingface.co/moyix/${MODEL}-gptj/resolve/main/${MODEL}-${NUM_GPUS}gpu.tar.zst" \
              -o "$ARCHIVE"
          zstd -dc "$ARCHIVE" | tar -xf - -C "${MODEL_DIR}"
          rm -f "$ARCHIVE"
      else
          echo "Downloading and converting the model, this will take a while..."
          docker run --rm -v "${MODEL_DIR}":/models -e MODEL="${MODEL}" -e NUM_GPUS="${NUM_GPUS}" moyix/model_converter:latest
      fi
    fi
 fi
-echo "Done! Now run ./launch.sh to start the FauxPilot server."
+# Create model directory
 mkdir -p "${MODEL_DIR}"
 # For some of the models we can download it pre-converted.
 if [ $NUM_GPUS -le 2 ]; then
    echo "Downloading the model from HuggingFace, this will take a while..."
    SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
    DEST="${MODEL}-${NUM_GPUS}gpu"
    ARCHIVE="${MODEL_DIR}/${DEST}.tar.zst"
    cp -r "$SCRIPT_DIR"/converter/models/"$DEST" "${MODEL_DIR}"
    curl -L "https://huggingface.co/moyix/${MODEL}-gptj/resolve/main/${MODEL}-${NUM_GPUS}gpu.tar.zst" \
        -o "$ARCHIVE"
    zstd -dc "$ARCHIVE" | tar -xf - -C "${MODEL_DIR}"
    rm -f "$ARCHIVE"
 else
    echo "Downloading and converting the model, this will take a while..."
    docker run --rm -v "${MODEL_DIR}":/models -e MODEL=${MODEL} -e NUM_GPUS="${NUM_GPUS}" moyix/model_converter:latest
 fi
 read -rp "Config complete, do you want to run FauxPilot? [y/n]" RUN
 if [[ ${RUN:-y} =~ ^[Yy]$ ]]
 then
  bash ./launch.sh
 else
  echo "You can run ./launch.sh to start the FauxPilot server."
  exit 0
 fi;