mirror of
https://github.com/fauxpilot/fauxpilot.git
synced 2025-07-06 21:11:53 -07:00
I'd been using dlpack for copying triton tensors to torch tensors, which I did because it was advertised to perform zero copy transfers. Turns out that only worked on my laptop, and didn't work on other machines. IDK why. But for now, I'm just copying the tensors as triton<->numpy<->torch. That works on the VM on which earlier code was segfaulting Signed-off-by: Parth <thakkarparth007@gmail.com>
163 lines
6 KiB
Python
163 lines
6 KiB
Python
"Tests setup script (currently for Python backend)"
|
|
|
|
import os
|
|
import subprocess
|
|
import signal
|
|
import shutil
|
|
from pathlib import Path
|
|
from typing import Dict, Union
|
|
|
|
import pexpect
|
|
import pytest
|
|
import requests
|
|
|
|
curdir = Path(__file__).parent
|
|
root = curdir.parent.parent
|
|
|
|
test_models_dir = curdir/"models"
|
|
|
|
|
|
def setup_module():
|
|
"Setup steps for tests in this module"
|
|
assert (root/"setup.sh").exists(), "setup.sh not found"
|
|
if (root/".env").exists():
|
|
shutil.move(str(root/".env"), str(root/".env.bak"))
|
|
|
|
def teardown_module():
|
|
"Teardown steps for tests in this module"
|
|
if (root/".env.bak").exists():
|
|
shutil.move(str(root/".env.bak"), str(root/".env"))
|
|
try:
|
|
if test_models_dir.exists():
|
|
shutil.rmtree(test_models_dir)
|
|
except Exception as exc:
|
|
print(
|
|
f"WARNING: Couldn't delete `{test_models_dir}` most likely due to permission issues."
|
|
f"Run the tests with sudo to ensure this gets deleted automatically, or else delete manually. Exception: {exc}"
|
|
)
|
|
|
|
def enter_input(proc: pexpect.spawn, expect: str, input_s: str, timeout: int = 5) -> str:
|
|
"Helper function to enter input for a given prompt. Returns consumed output."
|
|
|
|
try:
|
|
proc.expect(expect, timeout=timeout)
|
|
except pexpect.exceptions.TIMEOUT as exc:
|
|
raise AssertionError(
|
|
f"Timeout waiting for prompt: `{expect}`.\n"
|
|
f"Output-before: `{proc.before}`\nOutput-after: `{proc.after}`"
|
|
) from exc
|
|
|
|
after = str(proc.after)
|
|
print(after)
|
|
proc.sendline(input_s)
|
|
return after
|
|
|
|
def run_common_setup_steps(n_gpus: int = 0) -> pexpect.spawn:
|
|
"Helper function to run common setup steps."
|
|
proc = pexpect.pty_spawn.spawn(
|
|
"./setup.sh 2>&1", encoding="utf-8", cwd=str(root),
|
|
)
|
|
proc.ignorecase = True
|
|
|
|
enter_input(proc, r".*Enter number of GPUs[^:]+: ?", str(n_gpus))
|
|
enter_input(proc, r".*port for the API[^:]+: ?", "5000")
|
|
enter_input(proc, r".*Address for Triton[^:]+: ?", "triton")
|
|
enter_input(proc, r".*Port of Triton[^:]+: ?", "8001")
|
|
enter_input(proc, r".*save your models[^\?]+\? ?", str(test_models_dir.absolute()))
|
|
|
|
return proc
|
|
|
|
def load_test_env():
|
|
"Load test env vars"
|
|
# Without loading default env vars, PATH won't be set correctly
|
|
env = os.environ.copy()
|
|
with open(curdir/"test.env", "r", encoding="utf8") as test_env:
|
|
for line in test_env:
|
|
key, val = line.strip().split("=")
|
|
env[key] = val
|
|
return env
|
|
|
|
def run_inference(
|
|
prompt: str, model_name: str = "py-model", port: int = 5000, return_all: bool = False,
|
|
**kwargs
|
|
) -> Union[str, Dict]:
|
|
"Invokes the copilot proxy with the given prompt and returns the completion"
|
|
endpoint = f"http://localhost:{port}/v1/engines/codegen/completions"
|
|
data = {
|
|
"model": model_name,
|
|
"prompt": prompt,
|
|
"suffix": kwargs.get("suffix", ""),
|
|
"max_tokens": kwargs.get("max_tokens", 16),
|
|
"temperature": kwargs.get("temperature", 0.0),
|
|
"top_p": kwargs.get("top_p", 1.0),
|
|
"n": kwargs.get("n", 1),
|
|
"stream": kwargs.get("stream", None), # it's not true/false. It's None or not None :[
|
|
"logprobs": kwargs.get("logprobs", 0),
|
|
"stop": kwargs.get("stop", ""),
|
|
"echo": kwargs.get("echo", True),
|
|
"presence_penalty": kwargs.get("presence_penalty", 0.0),
|
|
"frequency_penalty": kwargs.get("frequency_penalty", 0.0),
|
|
"best_of": kwargs.get("best_of", 1),
|
|
"logit_bias": kwargs.get("logit_bias", {}),
|
|
"user": kwargs.get("user", "test"),
|
|
}
|
|
|
|
response = requests.post(endpoint, json=data)
|
|
response.raise_for_status()
|
|
|
|
if return_all:
|
|
return response.json()
|
|
return response.json()["choices"][0]["text"]
|
|
|
|
|
|
@pytest.mark.parametrize("n_gpus", [0]) # we don't have a GPU on CI
|
|
def test_python_backend(n_gpus: int):
|
|
"""
|
|
Step 1: run $root/setup.sh while passing appropriate options via stdin
|
|
Step 2: run docker-compose up with test.env sourced
|
|
Step 3: call :5000 with appropriate request
|
|
"""
|
|
proc = run_common_setup_steps(n_gpus)
|
|
|
|
choices = enter_input(proc, r".*Choose your backend.*Enter your choice[^:]+: ?", "2")
|
|
assert "[2] Python backend" in choices, "Option 2 should be Python backend"
|
|
|
|
choices = enter_input(proc, r".*Models available:.*Enter your choice[^:]+: ?", "1")
|
|
assert "[1] codegen-350M-mono" in choices, "Option 1 should be codegen-350M-mono"
|
|
|
|
enter_input(proc, r".*share (your )?huggingface cache[^:]+: ?", "y")
|
|
enter_input(proc, r".*cache directory[^:]+: ?", "") # default
|
|
enter_input(proc, r".*use int8[^:]+: ?", "n")
|
|
enter_input(proc, r".*run FauxPilot\? \[y/n\] ", "n")
|
|
|
|
# copy $root/.env to $curdir/test.env
|
|
shutil.copy(str(root/".env"), str(curdir/"test.env"))
|
|
|
|
# run docker-compose up -f docker-compose-{without|with}-gpus.yml
|
|
compose_file = f"docker-compose-with{'' if n_gpus > 0 else 'out'}-gpus.yaml"
|
|
docker_proc = None
|
|
try:
|
|
docker_proc = pexpect.pty_spawn.spawn(
|
|
f"docker-compose -f {compose_file} up",
|
|
encoding="utf-8",
|
|
cwd=curdir,
|
|
env=load_test_env(),
|
|
)
|
|
|
|
print("Waiting for API to be ready...")
|
|
docker_proc.expect(r".*Started GRPCInferenceService at 0.0.0.0:8001", timeout=120)
|
|
|
|
print("API ready, sending request...")
|
|
|
|
# Simple test 1: hello world prompt without bells and whistles
|
|
response = run_inference("def hello_world():\n", max_tokens=16, return_all=True)
|
|
assert response["choices"][0]["text"].rstrip() == ' print("Hello World")\n\nhello_world()\n\n#'
|
|
assert response["choices"][0]["finish_reason"] == "length"
|
|
|
|
finally:
|
|
if docker_proc is not None and docker_proc.isalive():
|
|
docker_proc.kill(signal.SIGINT)
|
|
|
|
# killing docker-compose process doesn't bring down the containers.
|
|
# explicitly stop the containers:
|
|
subprocess.run(["docker-compose", "-f", compose_file, "down"], cwd=curdir, check=True, env=load_test_env())
|