mirror of
https://github.com/fauxpilot/fauxpilot.git
synced 2025-07-15 01:23:25 -07:00
Resolve merge conflicts and fix issues with setup.sh
This commit is contained in:
commit
2a91018792
9 changed files with 115 additions and 92 deletions
|
@ -10,4 +10,4 @@ COPY copilot_proxy .
|
||||||
|
|
||||||
EXPOSE 5000
|
EXPOSE 5000
|
||||||
|
|
||||||
CMD [ "uvicorn", "--host", "0.0.0.0", "--port", "5000", "app:app"]
|
CMD ["uvicorn", "--host", "0.0.0.0", "--port", "5000", "app:app"]
|
||||||
|
|
|
@ -41,4 +41,4 @@ async def completions(data: OpenAIinput):
|
||||||
)
|
)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
uvicorn.run("app:app", host=os.environ.get("API_HOST", "0.0.0.0"), port=os.environ.get("API_PORT", 5000))
|
uvicorn.run("app:app", host="0.0.0.0", port=5000)
|
||||||
|
|
|
@ -4,7 +4,7 @@ from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
class OpenAIinput(BaseModel):
|
class OpenAIinput(BaseModel):
|
||||||
model: str = "fastertransformer|py-model"
|
model: str = "fastertransformer"
|
||||||
prompt: Optional[str]
|
prompt: Optional[str]
|
||||||
suffix: Optional[str]
|
suffix: Optional[str]
|
||||||
max_tokens: Optional[int] = 16
|
max_tokens: Optional[int] = 16
|
||||||
|
|
|
@ -6,14 +6,14 @@ import time
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tritonclient.grpc as client_util
|
import tritonclient.grpc as client_util
|
||||||
from tokenizers import Tokenizer
|
from tokenizers import Tokenizer
|
||||||
from tritonclient.utils import np_to_triton_dtype
|
from tritonclient.utils import np_to_triton_dtype, InferenceServerException
|
||||||
|
|
||||||
np.finfo(np.dtype("float32"))
|
np.finfo(np.dtype("float32"))
|
||||||
np.finfo(np.dtype("float64"))
|
np.finfo(np.dtype("float64"))
|
||||||
|
|
||||||
|
|
||||||
class CodeGenProxy:
|
class CodeGenProxy:
|
||||||
def __init__(self, host: str = 'localhost', port: int = 8001, verbose: bool = False):
|
def __init__(self, host: str = 'triton', port: int = 8001, verbose: bool = False):
|
||||||
self.tokenizer = Tokenizer.from_file('/python-docker/cgtok/tokenizer.json')
|
self.tokenizer = Tokenizer.from_file('/python-docker/cgtok/tokenizer.json')
|
||||||
self.client = client_util.InferenceServerClient(url=f'{host}:{port}', verbose=verbose)
|
self.client = client_util.InferenceServerClient(url=f'{host}:{port}', verbose=verbose)
|
||||||
self.PAD_CHAR = 50256
|
self.PAD_CHAR = 50256
|
||||||
|
@ -234,7 +234,12 @@ class CodeGenProxy:
|
||||||
|
|
||||||
def __call__(self, data: dict):
|
def __call__(self, data: dict):
|
||||||
st = time.time()
|
st = time.time()
|
||||||
|
try:
|
||||||
completion, choices = self.generate(data)
|
completion, choices = self.generate(data)
|
||||||
|
except InferenceServerException as E:
|
||||||
|
print(E)
|
||||||
|
completion = {}
|
||||||
|
choices = []
|
||||||
ed = time.time()
|
ed = time.time()
|
||||||
print(f"Returned completion in {(ed - st) * 1000} ms")
|
print(f"Returned completion in {(ed - st) * 1000} ms")
|
||||||
if data.get('stream', False):
|
if data.get('stream', False):
|
||||||
|
|
|
@ -28,9 +28,8 @@ services:
|
||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
dockerfile: copilot_proxy/Dockerfile
|
dockerfile: copilot_proxy/Dockerfile
|
||||||
command: uvicorn app:app --host 0.0.0.0 --port 5000
|
|
||||||
env_file:
|
env_file:
|
||||||
# You can modify this env file to configure your proxy environment
|
# Automatically created via ./setup.sh
|
||||||
- example.env
|
- .env
|
||||||
ports:
|
ports:
|
||||||
- "5000:5000"
|
- "${API_EXTERNAL_PORT}:5000"
|
||||||
|
|
18
launch.sh
18
launch.sh
|
@ -1,13 +1,14 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
# Read in config.env file; error if not found
|
# Read in .env file; error if not found
|
||||||
if [ ! -f config.env ]; then
|
if [ ! -f .env ]; then
|
||||||
echo "config.env not found, please run setup.sh"
|
echo ".env not found, running setup.sh"
|
||||||
exit 1
|
bash setup.sh
|
||||||
fi
|
fi
|
||||||
source config.env
|
source .env
|
||||||
|
|
||||||
export NUM_GPUS=${NUM_GPUS}
|
export NUM_GPUS=${NUM_GPUS}
|
||||||
|
export GPUS=$(seq 0 $(( NUM_GPUS - 1 )) | paste -sd ',')
|
||||||
|
|
||||||
# if model name starts with "py-", it means we're dealing with the python backend.
|
# if model name starts with "py-", it means we're dealing with the python backend.
|
||||||
if [[ $(echo "$MODEL" | cut -c1-3) == "py-" ]]; then
|
if [[ $(echo "$MODEL" | cut -c1-3) == "py-" ]]; then
|
||||||
|
@ -16,12 +17,7 @@ else
|
||||||
export MODEL_DIR="${MODEL_DIR}"/"${MODEL}-${NUM_GPUS}gpu"
|
export MODEL_DIR="${MODEL_DIR}"/"${MODEL}-${NUM_GPUS}gpu"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
export GPUS=$(seq 0 $(( NUM_GPUS - 1 )) | paste -sd ',')
|
|
||||||
export HF_CACHE_DIR=${HF_CACHE_DIR}
|
export HF_CACHE_DIR=${HF_CACHE_DIR}
|
||||||
|
|
||||||
# On newer versions, docker-compose is docker compose
|
# On newer versions, docker-compose is docker compose
|
||||||
if command -v docker-compose > /dev/null; then
|
docker compose up -d --remove-orphans || docker-compose up -d --remove-orphans
|
||||||
docker compose up
|
|
||||||
else
|
|
||||||
docker-compose up
|
|
||||||
fi
|
|
||||||
|
|
|
@ -3,12 +3,13 @@ A simple script that sets up the model directory of a given model for Triton.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from string import Template
|
from string import Template
|
||||||
|
|
||||||
SCRIPT_DIR = Path(__file__).parent
|
SCRIPT_DIR = Path(__file__).parent
|
||||||
CONFIG_TEMPLATE_PATH = SCRIPT_DIR/'config_template.pbtxt'
|
CONFIG_TEMPLATE_PATH = os.path.join(SCRIPT_DIR, 'config_template.pbtxt')
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--model_dir", type=str, required=True)
|
parser.add_argument("--model_dir", type=str, required=True)
|
||||||
|
@ -21,11 +22,11 @@ args = parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
# Step1: Make model directory
|
# Step1: Make model directory
|
||||||
model_dir_path = Path(args.model_dir)/f"py-{args.model_name}/py-model/1"
|
model_dir_path = Path(os.path.join(Path(args.model_dir), f"py-{args.model_name}/py-model/1"))
|
||||||
model_dir_path.mkdir(parents=True, exist_ok=True)
|
model_dir_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# Step 2: copy model.py
|
# Step 2: copy model.py
|
||||||
shutil.copy(SCRIPT_DIR/'model.py', model_dir_path/'model.py')
|
shutil.copy(os.path.join(SCRIPT_DIR, 'model.py'), os.path.join(model_dir_path, 'model.py'))
|
||||||
|
|
||||||
# Step 3: Generate config.pbtxt
|
# Step 3: Generate config.pbtxt
|
||||||
with open(CONFIG_TEMPLATE_PATH, 'r') as f:
|
with open(CONFIG_TEMPLATE_PATH, 'r') as f:
|
||||||
|
@ -38,5 +39,6 @@ config = template.substitute(
|
||||||
use_int8=args.use_int8,
|
use_int8=args.use_int8,
|
||||||
use_auto_device_map=args.use_auto_device_map,
|
use_auto_device_map=args.use_auto_device_map,
|
||||||
)
|
)
|
||||||
with open(model_dir_path/'../config.pbtxt', 'w') as f:
|
with open(os.path.join(model_dir_path, '../config.pbtxt'), 'w') as f:
|
||||||
f.write(config)
|
f.write(config)
|
||||||
|
print(f"Config written to")
|
||||||
|
|
139
setup.sh
139
setup.sh
|
@ -1,9 +1,15 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
if [ -f config.env ]; then
|
if [ -f .env ]; then
|
||||||
echo "config.env already exists, skipping"
|
read -rp ".env already exists, do you want to delete .env and recreate it? [y/n] " DELETE
|
||||||
echo "Please delete config.env if you want to re-run this script"
|
if [[ ${DELETE:-y} =~ ^[Yy]$ ]]
|
||||||
exit 1
|
then
|
||||||
|
echo "Deleting .env"
|
||||||
|
rm .env
|
||||||
|
else
|
||||||
|
echo "Exiting"
|
||||||
|
exit 0
|
||||||
|
fi;
|
||||||
fi
|
fi
|
||||||
|
|
||||||
function check_dep(){
|
function check_dep(){
|
||||||
|
@ -17,6 +23,34 @@ check_dep curl
|
||||||
check_dep zstd
|
check_dep zstd
|
||||||
check_dep docker
|
check_dep docker
|
||||||
|
|
||||||
|
# Read number of GPUs
|
||||||
|
read -rp "Enter number of GPUs [1]: " NUM_GPUS
|
||||||
|
NUM_GPUS=${NUM_GPUS:-1}
|
||||||
|
|
||||||
|
read -rp "External port for the API [5000]: " API_EXTERNAL_PORT
|
||||||
|
API_EXTERNAL_PORT=${API_EXTERNAL_PORT:-5000}
|
||||||
|
|
||||||
|
read -rp "Address for Triton [triton]: " TRITON_HOST
|
||||||
|
TRITON_HOST=${TRITON_HOST:-triton}
|
||||||
|
|
||||||
|
read -rp "Port of Triton host [8001]: " TRITON_PORT
|
||||||
|
TRITON_PORT=${TRITON_PORT:-8001}
|
||||||
|
|
||||||
|
# Read model directory
|
||||||
|
read -rp "Where do you want to save the model [$(pwd)/models]? " MODEL_DIR
|
||||||
|
if [ -z "$MODEL_DIR" ]; then
|
||||||
|
MODEL_DIR="$(pwd)/models"
|
||||||
|
else
|
||||||
|
MODEL_DIR="$(readlink -m "${MODEL_DIR}")"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Write .env
|
||||||
|
echo "NUM_GPUS=${NUM_GPUS}" >> .env
|
||||||
|
echo "MODEL_DIR=${MODEL_DIR}/${MODEL}-${NUM_GPUS}gpu" >> .env
|
||||||
|
echo "API_EXTERNAL_PORT=${API_EXTERNAL_PORT}" >> .env
|
||||||
|
echo "TRITON_HOST=${TRITON_HOST}" >> .env
|
||||||
|
echo "TRITON_PORT=${TRITON_PORT}" >> .env
|
||||||
|
echo "GPUS=$(seq 0 $(( NUM_GPUS - 1)) | paste -s -d ',' -)" >> .env
|
||||||
|
|
||||||
function fastertransformer_backend(){
|
function fastertransformer_backend(){
|
||||||
echo "Models available:"
|
echo "Models available:"
|
||||||
|
@ -29,7 +63,7 @@ function fastertransformer_backend(){
|
||||||
echo "[7] codegen-16B-mono (32GB total VRAM required; Python-only)"
|
echo "[7] codegen-16B-mono (32GB total VRAM required; Python-only)"
|
||||||
echo "[8] codegen-16B-multi (32GB total VRAM required; multi-language)"
|
echo "[8] codegen-16B-multi (32GB total VRAM required; multi-language)"
|
||||||
# Read their choice
|
# Read their choice
|
||||||
read -p "Enter your choice [6]: " MODEL_NUM
|
read -rp "Enter your choice [6]: " MODEL_NUM
|
||||||
|
|
||||||
# Convert model number to model name
|
# Convert model number to model name
|
||||||
case $MODEL_NUM in
|
case $MODEL_NUM in
|
||||||
|
@ -44,36 +78,29 @@ function fastertransformer_backend(){
|
||||||
*) MODEL="codegen-6B-multi" ;;
|
*) MODEL="codegen-6B-multi" ;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
# Read number of GPUs
|
echo "MODEL=${MODEL}" > .env
|
||||||
read -p "Enter number of GPUs [1]: " NUM_GPUS
|
|
||||||
NUM_GPUS=${NUM_GPUS:-1}
|
|
||||||
|
|
||||||
# Read model directory
|
if (test -d "$MODEL_DIR"/"${MODEL}"-"${NUM_GPUS}"gpu ); then
|
||||||
read -p "Where do you want to save the model [$(pwd)/models]? " MODEL_DIR
|
echo "$MODEL_DIR"/"${MODEL}"-"${NUM_GPUS}"gpu
|
||||||
if [ -z "$MODEL_DIR" ]; then
|
|
||||||
MODEL_DIR="$(pwd)/models"
|
|
||||||
else
|
|
||||||
MODEL_DIR="$(readlink -m "${MODEL_DIR}")"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Write config.env
|
|
||||||
echo "MODEL=${MODEL}" > config.env
|
|
||||||
echo "NUM_GPUS=${NUM_GPUS}" >> config.env
|
|
||||||
echo "MODEL_DIR=${MODEL_DIR}" >> config.env
|
|
||||||
|
|
||||||
if [ -d "$MODEL_DIR"/"${MODEL}"-${NUM_GPUS}gpu ]; then
|
|
||||||
echo "Converted model for ${MODEL}-${NUM_GPUS}gpu already exists."
|
echo "Converted model for ${MODEL}-${NUM_GPUS}gpu already exists."
|
||||||
read -p "Do you want to re-use it? y/n: " REUSE_CHOICE
|
read -rp "Do you want to re-use it? y/n: " REUSE_CHOICE
|
||||||
if [ "${REUSE_CHOICE^^}" = "Y" ]; then
|
if [[ ${REUSE_CHOICE:-y} =~ ^[Yy]$ ]]
|
||||||
exit 0
|
then
|
||||||
|
DOWNLOAD_MODEL=n
|
||||||
|
echo "Re-using model"
|
||||||
|
else
|
||||||
|
DOWNLOAD_MODEL=y
|
||||||
|
rm -rf "$MODEL_DIR"/"${MODEL}"-"${NUM_GPUS}"gpu
|
||||||
fi
|
fi
|
||||||
|
else
|
||||||
|
DOWNLOAD_MODEL=y
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [[ ${DOWNLOAD_MODEL:-y} =~ ^[Yy]$ ]]
|
||||||
|
then
|
||||||
# Create model directory
|
# Create model directory
|
||||||
mkdir -p "${MODEL_DIR}"
|
mkdir -p "${MODEL_DIR}"
|
||||||
|
if [ "$NUM_GPUS" -le 2 ]; then
|
||||||
# For some of the models we can download it preconverted.
|
|
||||||
if [ $NUM_GPUS -le 2 ]; then
|
|
||||||
echo "Downloading the model from HuggingFace, this will take a while..."
|
echo "Downloading the model from HuggingFace, this will take a while..."
|
||||||
SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
|
SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
|
||||||
DEST="${MODEL}-${NUM_GPUS}gpu"
|
DEST="${MODEL}-${NUM_GPUS}gpu"
|
||||||
|
@ -85,9 +112,9 @@ function fastertransformer_backend(){
|
||||||
rm -f "$ARCHIVE"
|
rm -f "$ARCHIVE"
|
||||||
else
|
else
|
||||||
echo "Downloading and converting the model, this will take a while..."
|
echo "Downloading and converting the model, this will take a while..."
|
||||||
docker run --rm -v ${MODEL_DIR}:/models -e MODEL=${MODEL} -e NUM_GPUS=${NUM_GPUS} moyix/model_converter:latest
|
docker run --rm -v "${MODEL_DIR}":/models -e MODEL=${MODEL} -e NUM_GPUS="${NUM_GPUS}" moyix/model_converter:latest
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
echo "Done! Now run ./launch.sh to start the FauxPilot server."
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function python_backend(){
|
function python_backend(){
|
||||||
|
@ -96,12 +123,8 @@ function python_backend(){
|
||||||
echo "[2] codegen-350M-multi (1GB total VRAM required; multi-language)"
|
echo "[2] codegen-350M-multi (1GB total VRAM required; multi-language)"
|
||||||
echo "[3] codegen-2B-mono (4GB total VRAM required; Python-only)"
|
echo "[3] codegen-2B-mono (4GB total VRAM required; Python-only)"
|
||||||
echo "[4] codegen-2B-multi (4GB total VRAM required; multi-language)"
|
echo "[4] codegen-2B-multi (4GB total VRAM required; multi-language)"
|
||||||
# echo "[5] codegen-6B-mono (13GB total VRAM required; Python-only)"
|
|
||||||
# echo "[6] codegen-6B-multi (13GB total VRAM required; multi-language)"
|
read -rp "Enter your choice [4]: " MODEL_NUM
|
||||||
# echo "[7] codegen-16B-mono (32GB total VRAM required; Python-only)"
|
|
||||||
# echo "[8] codegen-16B-multi (32GB total VRAM required; multi-language)"
|
|
||||||
# Read their choice
|
|
||||||
read -p "Enter your choice [4]: " MODEL_NUM
|
|
||||||
|
|
||||||
# Convert model number to model name
|
# Convert model number to model name
|
||||||
case $MODEL_NUM in
|
case $MODEL_NUM in
|
||||||
|
@ -109,36 +132,22 @@ function python_backend(){
|
||||||
2) MODEL="codegen-350M-multi"; ORG="Salesforce" ;;
|
2) MODEL="codegen-350M-multi"; ORG="Salesforce" ;;
|
||||||
3) MODEL="codegen-2B-mono"; ORG="Salesforce" ;;
|
3) MODEL="codegen-2B-mono"; ORG="Salesforce" ;;
|
||||||
4) MODEL="codegen-2B-multi"; ORG="Salesforce" ;;
|
4) MODEL="codegen-2B-multi"; ORG="Salesforce" ;;
|
||||||
|
*) MODEL="codegen-2B-multi"; ORG="Salesforce" ;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
# Read number of GPUs -- not strictly required for python backend, because of device_map="auto",
|
|
||||||
# but docker-compose.py uses it to select CUDA_VISIBLE_DEVICES
|
|
||||||
read -p "Enter number of GPUs [1]: " NUM_GPUS
|
|
||||||
NUM_GPUS=${NUM_GPUS:-1}
|
|
||||||
|
|
||||||
# Read model directory
|
|
||||||
read -p "Where do you want to save the model [$(pwd)/models]? " MODEL_DIR
|
|
||||||
MODEL_DIR=${MODEL_DIR:-$(pwd)/models}
|
|
||||||
if [ -z "$MODEL_DIR" ]; then
|
|
||||||
MODEL_DIR="$(pwd)/models"
|
|
||||||
else
|
|
||||||
MODEL_DIR="$(readlink -m "${MODEL_DIR}")"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# share huggingface cache? Should be safe to share, but permission issues may arise depending upon your docker setup
|
# share huggingface cache? Should be safe to share, but permission issues may arise depending upon your docker setup
|
||||||
read -p "Do you want to share your huggingface cache between host and docker container? y/n [n]: " SHARE_HF_CACHE
|
read -rp "Do you want to share your huggingface cache between host and docker container? y/n [n]: " SHARE_HF_CACHE
|
||||||
SHARE_HF_CACHE=${SHARE_HF_CACHE:-n}
|
SHARE_HF_CACHE=${SHARE_HF_CACHE:-n}
|
||||||
if [ "${SHARE_HF_CACHE^^}" = "Y" ]; then
|
if [[ ${SHARE_HF_CACHE:-y} =~ ^[Yy]$ ]]; then
|
||||||
read -p "Enter your huggingface cache directory [$HOME/.cache/huggingface]: " HF_CACHE_DIR
|
read -rp "Enter your huggingface cache directory [$HOME/.cache/huggingface]: " HF_CACHE_DIR
|
||||||
HF_CACHE_DIR=${HF_CACHE_DIR:-$HOME/.cache/huggingface}
|
HF_CACHE_DIR=${HF_CACHE_DIR:-$HOME/.cache/huggingface}
|
||||||
else
|
else
|
||||||
HF_CACHE_DIR="/tmp/hf_cache"
|
HF_CACHE_DIR="/tmp/hf_cache"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# use int8? Allows larger models to fit in GPU but might be very marginally slower
|
# use int8? Allows larger models to fit in GPU but might be very marginally slower
|
||||||
read -p "Do you want to use int8? y/n [y]: " USE_INT8
|
read -rp "Do you want to use int8? y/n [y]: " USE_INT8
|
||||||
USE_INT8=${USE_INT8:-y}
|
if [[ ${USE_INT8:-y} =~ ^[Nn]$ ]]; then
|
||||||
if [ "${USE_INT8^^}" = "N" ]; then
|
|
||||||
USE_INT8="0"
|
USE_INT8="0"
|
||||||
else
|
else
|
||||||
USE_INT8="1"
|
USE_INT8="1"
|
||||||
|
@ -146,25 +155,31 @@ function python_backend(){
|
||||||
|
|
||||||
# Write config.env
|
# Write config.env
|
||||||
echo "MODEL=py-${MODEL}" > config.env
|
echo "MODEL=py-${MODEL}" > config.env
|
||||||
echo "NUM_GPUS=${NUM_GPUS}" >> config.env
|
|
||||||
echo "MODEL_DIR=${MODEL_DIR}" >> config.env
|
|
||||||
echo "HF_CACHE_DIR=${HF_CACHE_DIR}" >> config.env
|
echo "HF_CACHE_DIR=${HF_CACHE_DIR}" >> config.env
|
||||||
|
|
||||||
# Create model directory
|
# Create model directory
|
||||||
mkdir -p "${MODEL_DIR}/"
|
mkdir -p "${MODEL_DIR}/"
|
||||||
python3 ./python_backend/init_model.py --model_name "${MODEL}" --org_name "${ORG}" --model_dir "${MODEL_DIR}" --use_int8 "${USE_INT8}"
|
|
||||||
|
|
||||||
echo "Done! Now run ./launch.sh to start the FauxPilot server."
|
python3 ./python_backend/init_model.py --model_name "${MODEL}" --org_name "${ORG}" --model_dir "${MODEL_DIR}" --use_int8 "${USE_INT8}"
|
||||||
}
|
}
|
||||||
|
|
||||||
# choose backend
|
# choose backend
|
||||||
echo "Choose your backend:"
|
echo "Choose your backend:"
|
||||||
echo "[1] FasterTransformer backend (faster, but limited models)"
|
echo "[1] FasterTransformer backend (faster, but limited models)"
|
||||||
echo "[2] Python backend (slower, but more models, and allows loading with int8)"
|
echo "[2] Python backend (slower, but more models, and allows loading with int8)"
|
||||||
read -p "Enter your choice [1]: " BACKEND_NUM
|
read -rp "Enter your choice [1]: " BACKEND_NUM
|
||||||
|
|
||||||
if [ $BACKEND_NUM -eq 2 ]; then
|
if [ "$BACKEND_NUM" -eq 2 ]; then
|
||||||
python_backend
|
python_backend
|
||||||
else
|
else
|
||||||
fastertransformer_backend
|
fastertransformer_backend
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
read -rp "Config complete, do you want to run FauxPilot? [y/n] " RUN
|
||||||
|
if [[ ${RUN:-y} =~ ^[Yy]$ ]]
|
||||||
|
then
|
||||||
|
bash ./launch.sh
|
||||||
|
else
|
||||||
|
echo "You can run ./launch.sh to start the FauxPilot server."
|
||||||
|
exit 0
|
||||||
|
fi;
|
6
shutdown.sh
Executable file
6
shutdown.sh
Executable file
|
@ -0,0 +1,6 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
source .env
|
||||||
|
|
||||||
|
# On newer versions, docker-compose is docker compose
|
||||||
|
docker compose down --remove-orphans || docker-compose down --remove-orphans
|
Loading…
Add table
Add a link
Reference in a new issue