mirror of
https://github.com/fauxpilot/fauxpilot.git
synced 2025-07-16 10:03:25 -07:00
Fix setup issues and add test script
Signed-off-by: Parth Thakkar <thakkarparth007@gmail.com>
This commit is contained in:
parent
2a91018792
commit
c6be12979e
8 changed files with 262 additions and 41 deletions
12
launch.sh
12
launch.sh
|
@ -7,17 +7,5 @@ if [ ! -f .env ]; then
|
||||||
fi
|
fi
|
||||||
source .env
|
source .env
|
||||||
|
|
||||||
export NUM_GPUS=${NUM_GPUS}
|
|
||||||
export GPUS=$(seq 0 $(( NUM_GPUS - 1 )) | paste -sd ',')
|
|
||||||
|
|
||||||
# if model name starts with "py-", it means we're dealing with the python backend.
|
|
||||||
if [[ $(echo "$MODEL" | cut -c1-3) == "py-" ]]; then
|
|
||||||
export MODEL_DIR="${MODEL_DIR}"/"${MODEL}" #/py_model"
|
|
||||||
else
|
|
||||||
export MODEL_DIR="${MODEL_DIR}"/"${MODEL}-${NUM_GPUS}gpu"
|
|
||||||
fi
|
|
||||||
|
|
||||||
export HF_CACHE_DIR=${HF_CACHE_DIR}
|
|
||||||
|
|
||||||
# On newer versions, docker-compose is docker compose
|
# On newer versions, docker-compose is docker compose
|
||||||
docker compose up -d --remove-orphans || docker-compose up -d --remove-orphans
|
docker compose up -d --remove-orphans || docker-compose up -d --remove-orphans
|
||||||
|
|
|
@ -22,7 +22,7 @@ args = parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
# Step1: Make model directory
|
# Step1: Make model directory
|
||||||
model_dir_path = Path(os.path.join(Path(args.model_dir), f"py-{args.model_name}/py-model/1"))
|
model_dir_path = Path(os.path.join(Path(args.model_dir), f"py-{args.org_name}-{args.model_name}/py-model/1"))
|
||||||
model_dir_path.mkdir(parents=True, exist_ok=True)
|
model_dir_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# Step 2: copy model.py
|
# Step 2: copy model.py
|
||||||
|
@ -41,4 +41,4 @@ config = template.substitute(
|
||||||
)
|
)
|
||||||
with open(os.path.join(model_dir_path, '../config.pbtxt'), 'w') as f:
|
with open(os.path.join(model_dir_path, '../config.pbtxt'), 'w') as f:
|
||||||
f.write(config)
|
f.write(config)
|
||||||
print(f"Config written to")
|
print(f"Config written to {os.path.abspath(f.name)}")
|
||||||
|
|
|
@ -25,15 +25,16 @@ class TritonPythonModel:
|
||||||
def get_bool(x):
|
def get_bool(x):
|
||||||
return model_config["parameters"][x]["string_value"].lower() in ["1", "true"]
|
return model_config["parameters"][x]["string_value"].lower() in ["1", "true"]
|
||||||
|
|
||||||
is_half = get_bool("use_half")
|
is_half = get_bool("use_half") and torch.cuda.is_available()
|
||||||
# This will make inference marginally slower, but will allow bigger models to fit in GPU
|
# This will make inference marginally slower, but will allow bigger models to fit in GPU
|
||||||
int8 = get_bool("use_int8")
|
int8 = get_bool("use_int8") and torch.cuda.is_available()
|
||||||
auto_device_map = get_bool("use_auto_device_map")
|
auto_device_map = get_bool("use_auto_device_map") and torch.cuda.is_available()
|
||||||
|
|
||||||
|
print("Cuda available?", torch.cuda.is_available())
|
||||||
print(f"is_half: {is_half}, int8: {int8}, auto_device_map: {auto_device_map}")
|
print(f"is_half: {is_half}, int8: {int8}, auto_device_map: {auto_device_map}")
|
||||||
self.model = AutoModelForCausalLM.from_pretrained(
|
self.model = AutoModelForCausalLM.from_pretrained(
|
||||||
model_name,
|
model_name,
|
||||||
torch_dtype=torch.float16 if is_half else "auto",
|
torch_dtype=torch.float16 if is_half else ("auto" if torch.cuda.is_available() else torch.float32),
|
||||||
load_in_8bit=int8,
|
load_in_8bit=int8,
|
||||||
device_map="auto" if auto_device_map else None,
|
device_map="auto" if auto_device_map else None,
|
||||||
low_cpu_mem_usage=True,
|
low_cpu_mem_usage=True,
|
||||||
|
|
49
setup.sh
49
setup.sh
|
@ -23,6 +23,8 @@ check_dep curl
|
||||||
check_dep zstd
|
check_dep zstd
|
||||||
check_dep docker
|
check_dep docker
|
||||||
|
|
||||||
|
############### Common configuration ###############
|
||||||
|
|
||||||
# Read number of GPUs
|
# Read number of GPUs
|
||||||
read -rp "Enter number of GPUs [1]: " NUM_GPUS
|
read -rp "Enter number of GPUs [1]: " NUM_GPUS
|
||||||
NUM_GPUS=${NUM_GPUS:-1}
|
NUM_GPUS=${NUM_GPUS:-1}
|
||||||
|
@ -36,21 +38,23 @@ TRITON_HOST=${TRITON_HOST:-triton}
|
||||||
read -rp "Port of Triton host [8001]: " TRITON_PORT
|
read -rp "Port of Triton host [8001]: " TRITON_PORT
|
||||||
TRITON_PORT=${TRITON_PORT:-8001}
|
TRITON_PORT=${TRITON_PORT:-8001}
|
||||||
|
|
||||||
# Read model directory
|
# Read models root directory (all models go under this)
|
||||||
read -rp "Where do you want to save the model [$(pwd)/models]? " MODEL_DIR
|
read -rp "Where do you want to save your models [$(pwd)/models]? " MODELS_ROOT_DIR
|
||||||
if [ -z "$MODEL_DIR" ]; then
|
if [ -z "$MODELS_ROOT_DIR" ]; then
|
||||||
MODEL_DIR="$(pwd)/models"
|
MODELS_ROOT_DIR="$(pwd)/models"
|
||||||
else
|
else
|
||||||
MODEL_DIR="$(readlink -m "${MODEL_DIR}")"
|
MODELS_ROOT_DIR="$(readlink -m "${MODELS_ROOT_DIR}")"
|
||||||
fi
|
fi
|
||||||
|
mkdir -p "$MODELS_ROOT_DIR"
|
||||||
|
|
||||||
# Write .env
|
# Write .env
|
||||||
echo "NUM_GPUS=${NUM_GPUS}" >> .env
|
echo "NUM_GPUS=${NUM_GPUS}" >> .env
|
||||||
echo "MODEL_DIR=${MODEL_DIR}/${MODEL}-${NUM_GPUS}gpu" >> .env
|
echo "GPUS=$(seq 0 $(( NUM_GPUS - 1)) | paste -s -d ',' -)" >> .env
|
||||||
echo "API_EXTERNAL_PORT=${API_EXTERNAL_PORT}" >> .env
|
echo "API_EXTERNAL_PORT=${API_EXTERNAL_PORT}" >> .env
|
||||||
echo "TRITON_HOST=${TRITON_HOST}" >> .env
|
echo "TRITON_HOST=${TRITON_HOST}" >> .env
|
||||||
echo "TRITON_PORT=${TRITON_PORT}" >> .env
|
echo "TRITON_PORT=${TRITON_PORT}" >> .env
|
||||||
echo "GPUS=$(seq 0 $(( NUM_GPUS - 1)) | paste -s -d ',' -)" >> .env
|
|
||||||
|
############### Backend specific configuration ###############
|
||||||
|
|
||||||
function fastertransformer_backend(){
|
function fastertransformer_backend(){
|
||||||
echo "Models available:"
|
echo "Models available:"
|
||||||
|
@ -78,10 +82,11 @@ function fastertransformer_backend(){
|
||||||
*) MODEL="codegen-6B-multi" ;;
|
*) MODEL="codegen-6B-multi" ;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
echo "MODEL=${MODEL}" > .env
|
echo "MODEL=${MODEL}" >> .env
|
||||||
|
echo "MODEL_DIR=${MODELS_ROOT_DIR}/${MODEL}-${NUM_GPUS}gpu" >> .env
|
||||||
|
|
||||||
if (test -d "$MODEL_DIR"/"${MODEL}"-"${NUM_GPUS}"gpu ); then
|
if (test -d "$MODELS_ROOT_DIR"/"${MODEL}"-"${NUM_GPUS}"gpu ); then
|
||||||
echo "$MODEL_DIR"/"${MODEL}"-"${NUM_GPUS}"gpu
|
echo "$MODELS_ROOT_DIR"/"${MODEL}"-"${NUM_GPUS}"gpu
|
||||||
echo "Converted model for ${MODEL}-${NUM_GPUS}gpu already exists."
|
echo "Converted model for ${MODEL}-${NUM_GPUS}gpu already exists."
|
||||||
read -rp "Do you want to re-use it? y/n: " REUSE_CHOICE
|
read -rp "Do you want to re-use it? y/n: " REUSE_CHOICE
|
||||||
if [[ ${REUSE_CHOICE:-y} =~ ^[Yy]$ ]]
|
if [[ ${REUSE_CHOICE:-y} =~ ^[Yy]$ ]]
|
||||||
|
@ -90,7 +95,7 @@ function fastertransformer_backend(){
|
||||||
echo "Re-using model"
|
echo "Re-using model"
|
||||||
else
|
else
|
||||||
DOWNLOAD_MODEL=y
|
DOWNLOAD_MODEL=y
|
||||||
rm -rf "$MODEL_DIR"/"${MODEL}"-"${NUM_GPUS}"gpu
|
rm -rf "$MODELS_ROOT_DIR"/"${MODEL}"-"${NUM_GPUS}"gpu
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
DOWNLOAD_MODEL=y
|
DOWNLOAD_MODEL=y
|
||||||
|
@ -98,21 +103,19 @@ function fastertransformer_backend(){
|
||||||
|
|
||||||
if [[ ${DOWNLOAD_MODEL:-y} =~ ^[Yy]$ ]]
|
if [[ ${DOWNLOAD_MODEL:-y} =~ ^[Yy]$ ]]
|
||||||
then
|
then
|
||||||
# Create model directory
|
|
||||||
mkdir -p "${MODEL_DIR}"
|
|
||||||
if [ "$NUM_GPUS" -le 2 ]; then
|
if [ "$NUM_GPUS" -le 2 ]; then
|
||||||
echo "Downloading the model from HuggingFace, this will take a while..."
|
echo "Downloading the model from HuggingFace, this will take a while..."
|
||||||
SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
|
SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
|
||||||
DEST="${MODEL}-${NUM_GPUS}gpu"
|
DEST="${MODEL}-${NUM_GPUS}gpu"
|
||||||
ARCHIVE="${MODEL_DIR}/${DEST}.tar.zst"
|
ARCHIVE="${MODELS_ROOT_DIR}/${DEST}.tar.zst"
|
||||||
cp -r "$SCRIPT_DIR"/converter/models/"$DEST" "${MODEL_DIR}"
|
cp -r "$SCRIPT_DIR"/converter/models/"$DEST" "${MODELS_ROOT_DIR}"
|
||||||
curl -L "https://huggingface.co/moyix/${MODEL}-gptj/resolve/main/${MODEL}-${NUM_GPUS}gpu.tar.zst" \
|
curl -L "https://huggingface.co/moyix/${MODEL}-gptj/resolve/main/${MODEL}-${NUM_GPUS}gpu.tar.zst" \
|
||||||
-o "$ARCHIVE"
|
-o "$ARCHIVE"
|
||||||
zstd -dc "$ARCHIVE" | tar -xf - -C "${MODEL_DIR}"
|
zstd -dc "$ARCHIVE" | tar -xf - -C "${MODELS_ROOT_DIR}"
|
||||||
rm -f "$ARCHIVE"
|
rm -f "$ARCHIVE"
|
||||||
else
|
else
|
||||||
echo "Downloading and converting the model, this will take a while..."
|
echo "Downloading and converting the model, this will take a while..."
|
||||||
docker run --rm -v "${MODEL_DIR}":/models -e MODEL=${MODEL} -e NUM_GPUS="${NUM_GPUS}" moyix/model_converter:latest
|
docker run --rm -v "${MODELS_ROOT_DIR}":/models -e MODEL=${MODEL} -e NUM_GPUS="${NUM_GPUS}" moyix/model_converter:latest
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
@ -154,15 +157,15 @@ function python_backend(){
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Write config.env
|
# Write config.env
|
||||||
echo "MODEL=py-${MODEL}" > config.env
|
echo "MODEL=py-${MODEL}" >> .env
|
||||||
echo "HF_CACHE_DIR=${HF_CACHE_DIR}" >> config.env
|
echo "MODEL_DIR=${MODELS_ROOT_DIR}/py-${ORG}-${MODEL}" >> .env # different format from fastertransformer backend
|
||||||
|
echo "HF_CACHE_DIR=${HF_CACHE_DIR}" >> .env
|
||||||
|
|
||||||
# Create model directory
|
python3 ./python_backend/init_model.py --model_name "${MODEL}" --org_name "${ORG}" --model_dir "${MODELS_ROOT_DIR}" --use_int8 "${USE_INT8}"
|
||||||
mkdir -p "${MODEL_DIR}/"
|
|
||||||
|
|
||||||
python3 ./python_backend/init_model.py --model_name "${MODEL}" --org_name "${ORG}" --model_dir "${MODEL_DIR}" --use_int8 "${USE_INT8}"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
common_config
|
||||||
|
|
||||||
# choose backend
|
# choose backend
|
||||||
echo "Choose your backend:"
|
echo "Choose your backend:"
|
||||||
echo "[1] FasterTransformer backend (faster, but limited models)"
|
echo "[1] FasterTransformer backend (faster, but limited models)"
|
||||||
|
|
2
tests/python_backend/.gitignore
vendored
Normal file
2
tests/python_backend/.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
test.env
|
||||||
|
models/*
|
35
tests/python_backend/docker-compose-with-gpus.yaml
Normal file
35
tests/python_backend/docker-compose-with-gpus.yaml
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
version: '3.3'
|
||||||
|
services:
|
||||||
|
triton:
|
||||||
|
build:
|
||||||
|
context: ../../
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
command: bash -c "CUDA_VISIBLE_DEVICES="${GPUS}" mpirun -n 1 --allow-run-as-root /opt/tritonserver/bin/tritonserver --model-repository=/model"
|
||||||
|
shm_size: '2gb'
|
||||||
|
volumes:
|
||||||
|
- ${MODEL_DIR}:/model
|
||||||
|
- ${HF_CACHE_DIR}:/root/.cache/huggingface
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
- "8001:8001"
|
||||||
|
- "8002:8002"
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
count: all
|
||||||
|
capabilities: [gpu]
|
||||||
|
copilot_proxy:
|
||||||
|
# For dockerhub version
|
||||||
|
# image: moyix/copilot_proxy:latest
|
||||||
|
# command: python3 -m flask run --host=0.0.0.0 --port=5000
|
||||||
|
# For local build
|
||||||
|
build:
|
||||||
|
context: ../../
|
||||||
|
dockerfile: copilot_proxy/Dockerfile
|
||||||
|
env_file:
|
||||||
|
# Automatically created via ./setup.sh
|
||||||
|
- test.env
|
||||||
|
ports:
|
||||||
|
- "${API_EXTERNAL_PORT}:5000"
|
28
tests/python_backend/docker-compose-without-gpus.yaml
Normal file
28
tests/python_backend/docker-compose-without-gpus.yaml
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
version: '3.3'
|
||||||
|
services:
|
||||||
|
triton:
|
||||||
|
build:
|
||||||
|
context: ../../
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
command: bash -c "CUDA_VISIBLE_DEVICES="${GPUS}" mpirun -n 1 --allow-run-as-root /opt/tritonserver/bin/tritonserver --model-repository=/model"
|
||||||
|
shm_size: '2gb'
|
||||||
|
volumes:
|
||||||
|
- ${MODEL_DIR}:/model
|
||||||
|
- ${HF_CACHE_DIR}:/root/.cache/huggingface
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
- "8001:8001"
|
||||||
|
- "8002:8002"
|
||||||
|
copilot_proxy:
|
||||||
|
# For dockerhub version
|
||||||
|
# image: moyix/copilot_proxy:latest
|
||||||
|
# command: python3 -m flask run --host=0.0.0.0 --port=5000
|
||||||
|
# For local build
|
||||||
|
build:
|
||||||
|
context: ../../
|
||||||
|
dockerfile: copilot_proxy/Dockerfile
|
||||||
|
env_file:
|
||||||
|
# Automatically created via ./setup.sh
|
||||||
|
- test.env
|
||||||
|
ports:
|
||||||
|
- "${API_EXTERNAL_PORT}:5000"
|
164
tests/python_backend/test_setup.py
Normal file
164
tests/python_backend/test_setup.py
Normal file
|
@ -0,0 +1,164 @@
|
||||||
|
"Tests setup script (currently for Python backend)"
|
||||||
|
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import signal
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, Union
|
||||||
|
|
||||||
|
import pexpect
|
||||||
|
import pytest
|
||||||
|
import requests
|
||||||
|
|
||||||
|
curdir = Path(__file__).parent
|
||||||
|
root = curdir.parent.parent
|
||||||
|
|
||||||
|
test_models_dir = curdir/"models"
|
||||||
|
|
||||||
|
|
||||||
|
def setup_module():
|
||||||
|
"Setup steps for tests in this module"
|
||||||
|
assert (root/"setup.sh").exists(), "setup.sh not found"
|
||||||
|
if (root/".env").exists():
|
||||||
|
shutil.move(str(root/".env"), str(root/".env.bak"))
|
||||||
|
|
||||||
|
def teardown_module():
|
||||||
|
"Teardown steps for tests in this module"
|
||||||
|
if (root/".env.bak").exists():
|
||||||
|
shutil.move(str(root/".env.bak"), str(root/".env"))
|
||||||
|
try:
|
||||||
|
if test_models_dir.exists():
|
||||||
|
shutil.rmtree(test_models_dir)
|
||||||
|
except Exception as exc:
|
||||||
|
print(
|
||||||
|
f"WARNING: Couldn't delete `{test_models_dir}` most likely due to permission issues."
|
||||||
|
f"Run the tests with sudo to ensure this gets deleted automatically, or else delete manually. Exception: {exc}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def enter_input(proc: pexpect.spawn, expect: str, input_s: str, timeout: int = 5) -> str:
|
||||||
|
"Helper function to enter input for a given prompt. Returns consumed output."
|
||||||
|
|
||||||
|
try:
|
||||||
|
proc.expect(expect, timeout=timeout)
|
||||||
|
except pexpect.exceptions.TIMEOUT as exc:
|
||||||
|
raise AssertionError(
|
||||||
|
f"Timeout waiting for prompt: `{expect}`.\n"
|
||||||
|
f"Output-before: `{proc.before}`\nOutput-after: `{proc.after}`"
|
||||||
|
) from exc
|
||||||
|
|
||||||
|
after = str(proc.after)
|
||||||
|
print(after)
|
||||||
|
proc.sendline(input_s)
|
||||||
|
return after
|
||||||
|
|
||||||
|
def run_common_setup_steps(n_gpus: int = 0) -> pexpect.spawn:
|
||||||
|
"Helper function to run common setup steps."
|
||||||
|
proc = pexpect.pty_spawn.spawn(
|
||||||
|
"./setup.sh 2>&1", encoding="utf-8", cwd=str(root),
|
||||||
|
)
|
||||||
|
proc.ignorecase = True
|
||||||
|
|
||||||
|
enter_input(proc, r".*Enter number of GPUs[^:]+: ?", str(n_gpus))
|
||||||
|
enter_input(proc, r".*port for the API[^:]+: ?", "5000")
|
||||||
|
enter_input(proc, r".*Address for Triton[^:]+: ?", "triton")
|
||||||
|
enter_input(proc, r".*Port of Triton[^:]+: ?", "8001")
|
||||||
|
enter_input(proc, r".*save your models[^\?]+\? ?", str(test_models_dir.absolute()))
|
||||||
|
|
||||||
|
return proc
|
||||||
|
|
||||||
|
def load_test_env():
|
||||||
|
"Load test env vars"
|
||||||
|
# Without loading default env vars, PATH won't be set correctly
|
||||||
|
env = os.environ.copy()
|
||||||
|
with open(curdir/"test.env", "r", encoding="utf8") as test_env:
|
||||||
|
for line in test_env:
|
||||||
|
key, val = line.strip().split("=")
|
||||||
|
env[key] = val
|
||||||
|
return env
|
||||||
|
|
||||||
|
def run_inference(
|
||||||
|
prompt: str, model_name: str = "py-model", port: int = 5000, return_all: bool = False,
|
||||||
|
**kwargs
|
||||||
|
) -> Union[str, Dict]:
|
||||||
|
"Invokes the copilot proxy with the given prompt and returns the completion"
|
||||||
|
endpoint = f"http://localhost:{port}/v1/engines/codegen/completions"
|
||||||
|
data = {
|
||||||
|
"model": model_name,
|
||||||
|
"prompt": prompt,
|
||||||
|
"suffix": kwargs.get("suffix", ""),
|
||||||
|
"max_tokens": kwargs.get("max_tokens", 16),
|
||||||
|
"temperature": kwargs.get("temperature", 0.0),
|
||||||
|
"top_p": kwargs.get("top_p", 1.0),
|
||||||
|
"n": kwargs.get("n", 1),
|
||||||
|
"stream": kwargs.get("stream", None), # it's not true/false. It's None or not None :[
|
||||||
|
"logprobs": kwargs.get("logprobs", 0),
|
||||||
|
"stop": kwargs.get("stop", ""),
|
||||||
|
"echo": kwargs.get("echo", True),
|
||||||
|
"presence_penalty": kwargs.get("presence_penalty", 0.0),
|
||||||
|
"frequency_penalty": kwargs.get("frequency_penalty", 0.0),
|
||||||
|
"best_of": kwargs.get("best_of", 1),
|
||||||
|
"logit_bias": kwargs.get("logit_bias", {}),
|
||||||
|
"user": kwargs.get("user", "test"),
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(endpoint, json=data)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
if return_all:
|
||||||
|
return response.json()
|
||||||
|
return response.json()["choices"][0]["text"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("n_gpus", [0]) # we don't have a GPU on CI
|
||||||
|
def test_python_backend(n_gpus: int):
|
||||||
|
"""
|
||||||
|
Step 1: run $root/setup.sh while passing appropriate options via stdin
|
||||||
|
Step 2: run docker-compose up with test.env sourced
|
||||||
|
Step 3: call :5000 with appropriate request
|
||||||
|
"""
|
||||||
|
proc = run_common_setup_steps(n_gpus)
|
||||||
|
|
||||||
|
choices = enter_input(proc, r".*Choose your backend.*Enter your choice[^:]+: ?", "2")
|
||||||
|
assert "[2] Python backend" in choices, "Option 2 should be Python backend"
|
||||||
|
|
||||||
|
choices = enter_input(proc, r".*Models available:.*Enter your choice[^:]+: ?", "1")
|
||||||
|
assert "[1] codegen-350M-mono" in choices, "Option 1 should be codegen-350M-mono"
|
||||||
|
|
||||||
|
enter_input(proc, r".*share (your )?huggingface cache[^:]+: ?", "y")
|
||||||
|
enter_input(proc, r".*cache directory[^:]+: ?", "") # default
|
||||||
|
enter_input(proc, r".*use int8[^:]+: ?", "n")
|
||||||
|
enter_input(proc, r".*run FauxPilot\? \[y/n\] ", "n")
|
||||||
|
|
||||||
|
# copy $root/.env to $curdir/test.env
|
||||||
|
shutil.copy(str(root/".env"), str(curdir/"test.env"))
|
||||||
|
|
||||||
|
# run docker-compose up -f docker-compose-{without|with}-gpus.yml
|
||||||
|
compose_file = f"docker-compose-with{'' if n_gpus > 0 else 'out'}-gpus.yaml"
|
||||||
|
docker_proc = None
|
||||||
|
try:
|
||||||
|
docker_proc = pexpect.pty_spawn.spawn(
|
||||||
|
f"docker-compose -f {compose_file} up",
|
||||||
|
encoding="utf-8",
|
||||||
|
cwd=curdir,
|
||||||
|
env=load_test_env(),
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Waiting for API to be ready...")
|
||||||
|
docker_proc.expect(r".*Started GRPCInferenceService at 0.0.0.0:8001", timeout=120)
|
||||||
|
|
||||||
|
print("API ready, sending request...")
|
||||||
|
|
||||||
|
# Simple test 1: hello world prompt without bells and whistles
|
||||||
|
response = run_inference("def hello_world():\n", max_tokens=16, return_all=True)
|
||||||
|
assert response["choices"][0]["text"].rstrip() == ' print("Hello World")\n\nhello_world()\n\n#'
|
||||||
|
assert response["choices"][0]["finish_reason"] == "length"
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if docker_proc is not None and docker_proc.isalive():
|
||||||
|
docker_proc.kill(signal.SIGINT)
|
||||||
|
|
||||||
|
# killing docker-compose process doesn't bring down the containers.
|
||||||
|
# explicitly stop the containers:
|
||||||
|
subprocess.run(["docker-compose", "-f", compose_file, "down"], cwd=curdir, check=True)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue