mirror of
https://github.com/fauxpilot/fauxpilot.git
synced 2025-07-08 05:51:26 -07:00
Simplify config and port handling
This commit is contained in:
parent
6f49915d2a
commit
87f4f53e27
5 changed files with 70 additions and 45 deletions
|
@ -8,7 +8,7 @@ from models import OpenAIinput
|
||||||
from utils.codegen import CodeGenProxy
|
from utils.codegen import CodeGenProxy
|
||||||
|
|
||||||
codegen = CodeGenProxy(
|
codegen = CodeGenProxy(
|
||||||
host=os.environ.get("TRITON_HOST", "localhost"),
|
host=os.environ.get("TRITON_HOST", "triton"),
|
||||||
port=os.environ.get("TRITON_PORT", 8001),
|
port=os.environ.get("TRITON_PORT", 8001),
|
||||||
verbose=os.environ.get("TRITON_VERBOSITY", False)
|
verbose=os.environ.get("TRITON_VERBOSITY", False)
|
||||||
)
|
)
|
||||||
|
@ -41,4 +41,4 @@ async def completions(data: OpenAIinput):
|
||||||
)
|
)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
uvicorn.run("app:app", host=os.environ.get("API_HOST", "0.0.0.0"), port=os.environ.get("API_PORT", 5000))
|
uvicorn.run("app:app", host="0.0.0.0", port=5000)
|
||||||
|
|
|
@ -13,7 +13,7 @@ np.finfo(np.dtype("float64"))
|
||||||
|
|
||||||
|
|
||||||
class CodeGenProxy:
|
class CodeGenProxy:
|
||||||
def __init__(self, host: str = 'localhost', port: int = 8001, verbose: bool = False):
|
def __init__(self, host: str = 'triton', port: int = 8001, verbose: bool = False):
|
||||||
self.tokenizer = Tokenizer.from_file('/python-docker/cgtok/tokenizer.json')
|
self.tokenizer = Tokenizer.from_file('/python-docker/cgtok/tokenizer.json')
|
||||||
self.client = client_util.InferenceServerClient(url=f'{host}:{port}', verbose=verbose)
|
self.client = client_util.InferenceServerClient(url=f'{host}:{port}', verbose=verbose)
|
||||||
self.PAD_CHAR = 50256
|
self.PAD_CHAR = 50256
|
||||||
|
|
|
@ -23,8 +23,7 @@ services:
|
||||||
# build:
|
# build:
|
||||||
# context: .
|
# context: .
|
||||||
# dockerfile: copilot_proxy/Dockerfile
|
# dockerfile: copilot_proxy/Dockerfile
|
||||||
command: uvicorn app:app --host $API_HOST --port $API_PORT
|
|
||||||
env_file:
|
env_file:
|
||||||
- .env
|
- .env
|
||||||
ports:
|
ports:
|
||||||
- "5001:${API_PORT}"
|
- "${API_EXTERNAL_PORT}:5000"
|
||||||
|
|
13
launch.sh
13
launch.sh
|
@ -1,11 +1,11 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
# Read in config.env file; error if not found
|
# Read in .env file; error if not found
|
||||||
if [ ! -f config.env ]; then
|
if [ ! -f .env ]; then
|
||||||
echo "config.env not found, please run setup.sh"
|
echo ".env not found, running setup.sh"
|
||||||
exit 1
|
bash setup.sh
|
||||||
fi
|
fi
|
||||||
source config.env
|
source .env
|
||||||
|
|
||||||
# On newer versions, docker-compose is docker compose
|
# On newer versions, docker-compose is docker compose
|
||||||
DOCKER_COMPOSE=$(command -v docker-compose)
|
DOCKER_COMPOSE=$(command -v docker-compose)
|
||||||
|
@ -13,7 +13,4 @@ if [ -z "$DOCKER_COMPOSE" ]; then
|
||||||
DOCKER_COMPOSE="docker compose"
|
DOCKER_COMPOSE="docker compose"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
export NUM_GPUS=${NUM_GPUS}
|
|
||||||
export MODEL_DIR="${MODEL_DIR}"/"${MODEL}-${NUM_GPUS}gpu"
|
|
||||||
export GPUS=$(seq 0 $(( NUM_GPUS - 1 )) | paste -sd ',')
|
|
||||||
$DOCKER_COMPOSE up
|
$DOCKER_COMPOSE up
|
||||||
|
|
93
setup.sh
93
setup.sh
|
@ -1,9 +1,15 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
if [ -f config.env ]; then
|
if [ -f .env ]; then
|
||||||
echo "config.env already exists, skipping"
|
read -rp ".env already exists, do you want to delete .env and recreate it? [y/n] " DELETE
|
||||||
echo "Please delete config.env if you want to re-run this script"
|
if [[ ${DELETE:-y} =~ ^[Yy]$ ]]
|
||||||
exit 0
|
then
|
||||||
|
echo "Deleting .env"
|
||||||
|
rm .env
|
||||||
|
else
|
||||||
|
echo "Exiting"
|
||||||
|
exit 0
|
||||||
|
fi;
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Models available:"
|
echo "Models available:"
|
||||||
|
@ -16,7 +22,7 @@ echo "[6] codegen-6B-multi (13GB total VRAM required; multi-language)"
|
||||||
echo "[7] codegen-16B-mono (32GB total VRAM required; Python-only)"
|
echo "[7] codegen-16B-mono (32GB total VRAM required; Python-only)"
|
||||||
echo "[8] codegen-16B-multi (32GB total VRAM required; multi-language)"
|
echo "[8] codegen-16B-multi (32GB total VRAM required; multi-language)"
|
||||||
# Read their choice
|
# Read their choice
|
||||||
read -p "Enter your choice [6]: " MODEL_NUM
|
read -rp "Enter your choice [6]: " MODEL_NUM
|
||||||
|
|
||||||
# Convert model number to model name
|
# Convert model number to model name
|
||||||
case $MODEL_NUM in
|
case $MODEL_NUM in
|
||||||
|
@ -32,45 +38,68 @@ case $MODEL_NUM in
|
||||||
esac
|
esac
|
||||||
|
|
||||||
# Read number of GPUs
|
# Read number of GPUs
|
||||||
read -p "Enter number of GPUs [1]: " NUM_GPUS
|
read -rp "Enter number of GPUs [1]: " NUM_GPUS
|
||||||
NUM_GPUS=${NUM_GPUS:-1}
|
NUM_GPUS=${NUM_GPUS:-1}
|
||||||
|
|
||||||
|
read -rp "External port for the API [5000]: " API_EXTERNAL_PORT
|
||||||
|
API_EXTERNAL_PORT=${API_EXTERNAL_PORT:-5000}
|
||||||
|
|
||||||
|
read -rp "Address for Triton [triton]: " TRITON_HOST
|
||||||
|
TRITON_HOST=${TRITON_HOST:-triton}
|
||||||
|
|
||||||
|
read -rp "Port of Triton host [8001]: " TRITON_PORT
|
||||||
|
TRITON_PORT=${TRITON_PORT:-8001}
|
||||||
|
|
||||||
# Read model directory
|
# Read model directory
|
||||||
read -p "Where do you want to save the model [$(pwd)/models]? " MODEL_DIR
|
read -rp "Where do you want to save the model [$(pwd)/models]? " MODEL_DIR
|
||||||
if [ -z "$MODEL_DIR" ]; then
|
if [ -z "$MODEL_DIR" ]; then
|
||||||
MODEL_DIR="$(pwd)/models"
|
MODEL_DIR="$(pwd)/models"
|
||||||
else
|
else
|
||||||
MODEL_DIR="$(readlink -m "${MODEL_DIR}")"
|
MODEL_DIR="$(readlink -m "${MODEL_DIR}")"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Write config.env
|
# Write .env
|
||||||
echo "MODEL=${MODEL}" > config.env
|
echo "MODEL=${MODEL}" > .env
|
||||||
echo "NUM_GPUS=${NUM_GPUS}" >> config.env
|
echo "NUM_GPUS=${NUM_GPUS}" >> .env
|
||||||
echo "MODEL_DIR=${MODEL_DIR}" >> config.env
|
echo "MODEL_DIR=${MODEL_DIR}/${MODEL}-${NUM_GPUS}gpu" >> .env
|
||||||
|
echo "API_EXTERNAL_PORT=${API_EXTERNAL_PORT}" >> .env
|
||||||
|
echo "TRITON_HOST=${TRITON_HOST}" >> .env
|
||||||
|
echo "TRITON_PORT=${TRITON_PORT}" >> .env
|
||||||
|
GPUS="$(seq -s ',' -t '\n' "${NUM_GPUS}")"
|
||||||
|
echo "GPUS=${GPUS%?}" >> .env
|
||||||
|
|
||||||
if [ -d "$MODEL_DIR"/"${MODEL}"-"${NUM_GPUS}"gpu ]; then
|
if [ -d "$MODEL_DIR"/"${MODEL}"-"${NUM_GPUS}"gpu ]; then
|
||||||
echo "Converted model for ${MODEL}-${NUM_GPUS}gpu already exists."
|
echo "Converted model for ${MODEL}-${NUM_GPUS}gpu already exists."
|
||||||
read -p "Do you want to re-use it? y/n: " REUSE_CHOICE
|
read -rp "Do you want to re-use it? y/n: " REUSE_CHOICE
|
||||||
if ${REUSE_CHOICE^^} != "Y"; then
|
if [ "${REUSE_CHOICE^^}" = "Y" ]; then
|
||||||
# Create model directory
|
exit 0
|
||||||
mkdir -p "${MODEL_DIR}"
|
|
||||||
|
|
||||||
# For some of the models we can download it pre-converted.
|
|
||||||
if [ "$NUM_GPUS" -le 2 ]; then
|
|
||||||
echo "Downloading the model from HuggingFace, this will take a while..."
|
|
||||||
SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
|
|
||||||
DEST="${MODEL}-${NUM_GPUS}gpu"
|
|
||||||
ARCHIVE="${MODEL_DIR}/${DEST}.tar.zst"
|
|
||||||
cp -r "$SCRIPT_DIR"/converter/models/"$DEST" "${MODEL_DIR}"
|
|
||||||
curl -L "https://huggingface.co/moyix/${MODEL}-gptj/resolve/main/${MODEL}-${NUM_GPUS}gpu.tar.zst" \
|
|
||||||
-o "$ARCHIVE"
|
|
||||||
zstd -dc "$ARCHIVE" | tar -xf - -C "${MODEL_DIR}"
|
|
||||||
rm -f "$ARCHIVE"
|
|
||||||
else
|
|
||||||
echo "Downloading and converting the model, this will take a while..."
|
|
||||||
docker run --rm -v "${MODEL_DIR}":/models -e MODEL="${MODEL}" -e NUM_GPUS="${NUM_GPUS}" moyix/model_converter:latest
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Done! Now run ./launch.sh to start the FauxPilot server."
|
# Create model directory
|
||||||
|
mkdir -p "${MODEL_DIR}"
|
||||||
|
|
||||||
|
# For some of the models we can download it pre-converted.
|
||||||
|
if [ $NUM_GPUS -le 2 ]; then
|
||||||
|
echo "Downloading the model from HuggingFace, this will take a while..."
|
||||||
|
SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
|
||||||
|
DEST="${MODEL}-${NUM_GPUS}gpu"
|
||||||
|
ARCHIVE="${MODEL_DIR}/${DEST}.tar.zst"
|
||||||
|
cp -r "$SCRIPT_DIR"/converter/models/"$DEST" "${MODEL_DIR}"
|
||||||
|
curl -L "https://huggingface.co/moyix/${MODEL}-gptj/resolve/main/${MODEL}-${NUM_GPUS}gpu.tar.zst" \
|
||||||
|
-o "$ARCHIVE"
|
||||||
|
zstd -dc "$ARCHIVE" | tar -xf - -C "${MODEL_DIR}"
|
||||||
|
rm -f "$ARCHIVE"
|
||||||
|
else
|
||||||
|
echo "Downloading and converting the model, this will take a while..."
|
||||||
|
docker run --rm -v "${MODEL_DIR}":/models -e MODEL=${MODEL} -e NUM_GPUS="${NUM_GPUS}" moyix/model_converter:latest
|
||||||
|
fi
|
||||||
|
|
||||||
|
read -rp "Config complete, do you want to run FauxPilot? [y/n]" RUN
|
||||||
|
if [[ ${RUN:-y} =~ ^[Yy]$ ]]
|
||||||
|
then
|
||||||
|
bash ./launch.sh
|
||||||
|
else
|
||||||
|
echo "You can run ./launch.sh to start the FauxPilot server."
|
||||||
|
exit 0
|
||||||
|
fi;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue