mirror of
https://github.com/fauxpilot/fauxpilot.git
synced 2025-07-16 10:03:25 -07:00
Dev (#148)
* Create publish-docker-images.yaml * Add copilot_proxy publishing * Add model_converter publishing * Use dockerhub version * Do not login for PRs * Overwrite some of labels value * Move ignore files to the root of `context` * Add comments & fix some issue * Fix typos * Remove the target of the master branch * Delete .dockerignore * Delete .dockerignore * Add Flake8 * Add Flake8 and format code accordingly * Iterate on the PR template, fix the token for the contributor action * Remove converter image build * Update Dockerfile of proxy * Comment out proxy image in compose Co-authored-by: Fred de Gier <freddegier@me.com> * Fix build action --------- Co-authored-by: Rowe Wilson Frederisk Holme <frederisk@outlook.com>
This commit is contained in:
parent
1bb0e53117
commit
283668448d
13 changed files with 198 additions and 67 deletions
27
.github/PULL_REQUEST_TEMPLATE.md
vendored
27
.github/PULL_REQUEST_TEMPLATE.md
vendored
|
@ -1,18 +1,12 @@
|
||||||
---
|
---
|
||||||
# [Template] PR Description
|
|
||||||
|
|
||||||
In general, the github system duplicates your commit message automatically for your convenience.
|
|
||||||
After composing your own PR description using this template, please remove any unneeded portions.
|
|
||||||
```bash
|
|
||||||
## 1. General Description
|
## 1. General Description
|
||||||
The commit title must begin with one of the eleven given options.
|
<!-- Describe what this PR intents to do -->
|
||||||
Build, chore, CI, documentation, task, fix, performance, refactor, revert, style, and test are some examples.
|
|
||||||
or more details, please see [HERE](https://www.conventionalcommits.org/en/v1.0.0/).
|
|
||||||
Summarize changes in no more than 50 characters ASAP for readability and maintenance.
|
|
||||||
|
|
||||||
## 2. Changes proposed in this PR:
|
## 2. Changes proposed in this PR:
|
||||||
- Bulleted lists are also acceptable.
|
<!-- Bulleted lists are also acceptable. Typically, a hyphen or asterisk before the bullet, followed by a single space.-->
|
||||||
- Typically, a hyphen or asterisk before the bullet, followed by a single space.
|
1.
|
||||||
|
1.
|
||||||
|
|
||||||
Resolves: #{GitHub-Issue-Number}
|
Resolves: #{GitHub-Issue-Number}
|
||||||
See also: #{GitHub-Issue-Number}
|
See also: #{GitHub-Issue-Number}
|
||||||
|
@ -20,8 +14,9 @@ See also: #{GitHub-Issue-Number}
|
||||||
|
|
||||||
## 3. How to evaluate:
|
## 3. How to evaluate:
|
||||||
1. Describe how to evaluate such that it may be reproduced by the reviewer (s).
|
1. Describe how to evaluate such that it may be reproduced by the reviewer (s).
|
||||||
2. Self assessment:**
|
1.
|
||||||
- Build test: [ ]Passed [ ]Failed [*]Skipped
|
1.
|
||||||
- Run test: [ ]Passed [ ]Failed [* ]Skipped
|
1.
|
||||||
```
|
1. Self assessment:
|
||||||
|
- [ ] Successfully build locally `docker-compose build`:
|
||||||
|
- [ ] Successfully tested the full solution locally
|
||||||
|
|
17
.github/workflows/lint.yml
vendored
Normal file
17
.github/workflows/lint.yml
vendored
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
name: flake8 Lint
|
||||||
|
|
||||||
|
on: [push, pull_request]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
flake8-lint:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
name: Lint
|
||||||
|
steps:
|
||||||
|
- name: Check out source repository
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
- name: Set up Python environment
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: "3.10"
|
||||||
|
- name: flake8 Lint
|
||||||
|
uses: py-actions/flake8@v2
|
94
.github/workflows/publish-docker-images.yaml
vendored
Normal file
94
.github/workflows/publish-docker-images.yaml
vendored
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
name: Publish Docker Images
|
||||||
|
|
||||||
|
on:
|
||||||
|
# `push` will build and public the image with a tag corresponding to the branch name (such as `main`, `master`).
|
||||||
|
# `push tag` will build and public the image with a tag has the same name as git tag (such as `v1.2.3`, `v0.0.1-alpha.1`) and a tag named `latest`.
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
# `pull request` will only build to test the image, not publish.
|
||||||
|
pull_request:
|
||||||
|
# The branches below must be a subset of the branches above.
|
||||||
|
branches: [main]
|
||||||
|
# `release publish` is almost the same as `push tag`.
|
||||||
|
release:
|
||||||
|
types: [published]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
publish_copilot_proxy:
|
||||||
|
name: Publish copilot_proxy
|
||||||
|
# The `-latest` runner images are the latest **stable** images that GitHub provides.
|
||||||
|
# Here it is equivalent to `ubuntu-20.04`.
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Extract Docker metadata
|
||||||
|
uses: docker/metadata-action@v4
|
||||||
|
with:
|
||||||
|
# Define image name
|
||||||
|
images: moyix/copilot_proxy
|
||||||
|
# Overwrite labels
|
||||||
|
labels: |
|
||||||
|
org.opencontainers.image.title=CopilotProxy
|
||||||
|
org.opencontainers.image.description=A simple proxy that enables triton to send back copilot compatible communication content.
|
||||||
|
org.opencontainers.image.url=https://github.com/moyix/fauxpilot/tree/main/copilot_proxy
|
||||||
|
org.opencontainers.image.source=https://github.com/moyix/fauxpilot/tree/main/copilot_proxy
|
||||||
|
|
||||||
|
- name: Login to Docker Hub
|
||||||
|
# Do not login for PRs
|
||||||
|
if: github.event_name != 'pull_request'
|
||||||
|
uses: docker/login-action@v2
|
||||||
|
with:
|
||||||
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
|
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Build and push images
|
||||||
|
uses: docker/build-push-action@v3
|
||||||
|
with:
|
||||||
|
# For PRs, just build and not push.
|
||||||
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
|
context: .
|
||||||
|
file: proxy.Dockerfile
|
||||||
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
|
||||||
|
# publish_model_converter:
|
||||||
|
# name: Publish model_converter
|
||||||
|
# # The `-latest` runner images are the latest **stable** images that GitHub provides.
|
||||||
|
# # Here it is equivalent to `ubuntu-20.04`.
|
||||||
|
# runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
# steps:
|
||||||
|
# - name: Checkout repository
|
||||||
|
# uses: actions/checkout@v3
|
||||||
|
|
||||||
|
# - name: Extract Docker metadata
|
||||||
|
# uses: docker/metadata-action@v4
|
||||||
|
# with:
|
||||||
|
# # Define image name
|
||||||
|
# images: moyix/model_converter
|
||||||
|
# # Overwrite labels
|
||||||
|
# labels: |
|
||||||
|
# org.opencontainers.image.title=ModelConverter
|
||||||
|
# org.opencontainers.image.description=Convert the model to GPT-J to adapt the FasterTransformer Backend.
|
||||||
|
# org.opencontainers.image.url=https://github.com/moyix/fauxpilot/tree/main/converter
|
||||||
|
# org.opencontainers.image.source=https://github.com/moyix/fauxpilot/tree/main/converter
|
||||||
|
|
||||||
|
# - name: Login to Docker Hub
|
||||||
|
# # Do not login for PRs.
|
||||||
|
# if: github.event_name != 'pull_request'
|
||||||
|
# uses: docker/login-action@v2
|
||||||
|
# with:
|
||||||
|
# username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
|
# password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||||
|
|
||||||
|
# - name: Build and push images
|
||||||
|
# uses: docker/build-push-action@v3
|
||||||
|
# with:
|
||||||
|
# # For PRs, just build and not push.
|
||||||
|
# push: ${{ github.event_name != 'pull_request' }}
|
||||||
|
# context: ./converter
|
||||||
|
# tags: ${{ steps.meta.outputs.tags }}
|
||||||
|
# labels: ${{ steps.meta.outputs.labels }}
|
|
@ -13,7 +13,7 @@ jobs:
|
||||||
- name: 'Greet the contributor'
|
- name: 'Greet the contributor'
|
||||||
uses: garg3133/welcome-new-contributors@v1.2
|
uses: garg3133/welcome-new-contributors@v1.2
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.BOT_ACCESS_TOKEN }}
|
token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
is-oauth-token: true
|
is-oauth-token: true
|
||||||
issue-message: 'Hello there, thanks for opening your first issue. We
|
issue-message: 'Hello there, thanks for opening your first issue. We
|
||||||
welcome you to the FauxPilot community!'
|
welcome you to the FauxPilot community!'
|
||||||
|
|
|
@ -4,14 +4,14 @@ import argparse
|
||||||
import torch
|
import torch
|
||||||
from transformers import GPTJForCausalLM, GPTJConfig
|
from transformers import GPTJForCausalLM, GPTJConfig
|
||||||
# Note: these need the git version of Transformers as of 7/22/2022
|
# Note: these need the git version of Transformers as of 7/22/2022
|
||||||
from transformers import CodeGenTokenizer, CodeGenForCausalLM
|
from transformers import CodeGenTokenizer, CodeGenForCausalLM # noqa: F401
|
||||||
from transformers import CODEGEN_PRETRAINED_MODEL_ARCHIVE_LIST
|
from transformers import CODEGEN_PRETRAINED_MODEL_ARCHIVE_LIST
|
||||||
|
|
||||||
parser = argparse.ArgumentParser('Convert SalesForce CodeGen model to GPT-J')
|
parser = argparse.ArgumentParser('Convert SalesForce CodeGen model to GPT-J')
|
||||||
parser.add_argument('--code_model',
|
parser.add_argument('--code_model',
|
||||||
choices=CODEGEN_PRETRAINED_MODEL_ARCHIVE_LIST, default='Salesforce/codegen-350M-multi',
|
choices=CODEGEN_PRETRAINED_MODEL_ARCHIVE_LIST, default='Salesforce/codegen-350M-multi',
|
||||||
help='which SalesForce model to convert'
|
help='which SalesForce model to convert'
|
||||||
)
|
)
|
||||||
parser.add_argument('output_dir', help='where to store the converted model')
|
parser.add_argument('output_dir', help='where to store the converted model')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
@ -47,13 +47,16 @@ config.tokenizer_class = 'CodeGenTokenizer'
|
||||||
gptj_model = GPTJForCausalLM(config).half()
|
gptj_model = GPTJForCausalLM(config).half()
|
||||||
embed_dim = config.n_embd
|
embed_dim = config.n_embd
|
||||||
|
|
||||||
def replace(model, weights, name):
|
|
||||||
model.state_dict()[name].copy_(weights.detach())
|
def replace(model, weights, model_name):
|
||||||
|
model.state_dict()[model_name].copy_(weights.detach())
|
||||||
|
|
||||||
|
|
||||||
def replace_by_name(dest_model, src_model, old_name, new_name):
|
def replace_by_name(dest_model, src_model, old_name, new_name):
|
||||||
assert old_name in src_model.state_dict()
|
assert old_name in src_model.state_dict()
|
||||||
assert new_name in dest_model.state_dict()
|
assert new_name in dest_model.state_dict()
|
||||||
replace(dest_model, src_model.state_dict()[old_name], new_name)
|
replace(model=dest_model, weights=src_model.state_dict()[old_name], model_name=new_name)
|
||||||
|
|
||||||
|
|
||||||
print('Converting...')
|
print('Converting...')
|
||||||
# Copy weights from CodeGen model
|
# Copy weights from CodeGen model
|
||||||
|
@ -72,9 +75,9 @@ with torch.no_grad():
|
||||||
# After a great deal of pain, I figured out that this permutation on
|
# After a great deal of pain, I figured out that this permutation on
|
||||||
# the weights of the qkv_proj fixes it.
|
# the weights of the qkv_proj fixes it.
|
||||||
base_permutation = [0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11]
|
base_permutation = [0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11]
|
||||||
permutation = torch.cat([torch.arange(i*local_dim, (i+1)*local_dim) for i in base_permutation])
|
permutation = torch.cat([torch.arange(i * local_dim, (i + 1) * local_dim) for i in base_permutation])
|
||||||
# NB: we permute the *rows* here because the computation is xA.T
|
# NB: we permute the *rows* here because the computation is xA.T
|
||||||
new_qkv_proj = qkv_proj[permutation,:]
|
new_qkv_proj = qkv_proj[permutation, :]
|
||||||
# NB: the name QKV is misleading here; they are actually stored in
|
# NB: the name QKV is misleading here; they are actually stored in
|
||||||
# the order QVK
|
# the order QVK
|
||||||
query, value, key = torch.split(new_qkv_proj, embed_dim, dim=0)
|
query, value, key = torch.split(new_qkv_proj, embed_dim, dim=0)
|
||||||
|
@ -82,7 +85,7 @@ with torch.no_grad():
|
||||||
replace(gptj_model, key, name.replace('qkv_proj', 'k_proj'))
|
replace(gptj_model, key, name.replace('qkv_proj', 'k_proj'))
|
||||||
replace(gptj_model, value, name.replace('qkv_proj', 'v_proj'))
|
replace(gptj_model, value, name.replace('qkv_proj', 'v_proj'))
|
||||||
else:
|
else:
|
||||||
replace_by_name(gptj_model, cg_model, name, name)
|
replace_by_name(dest_model=gptj_model, src_model=cg_model, old_name=name, new_name=name)
|
||||||
|
|
||||||
print('Conversion complete.')
|
print('Conversion complete.')
|
||||||
print(f"Saving model to {args.output_dir}...")
|
print(f"Saving model to {args.output_dir}...")
|
||||||
|
|
|
@ -23,10 +23,12 @@ import torch
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from transformers import GPTJForCausalLM
|
from transformers import GPTJForCausalLM
|
||||||
|
|
||||||
dir_path = os.path.dirname(os.path.realpath(__file__))
|
dir_path = os.path.dirname(os.path.realpath(__file__))
|
||||||
sys.path.append(dir_path + "/../../../..")
|
sys.path.append(dir_path + "/../../../..")
|
||||||
sys.path.append(dir_path)
|
sys.path.append(dir_path)
|
||||||
|
|
||||||
|
|
||||||
def get_weight_data_type(data_type):
|
def get_weight_data_type(data_type):
|
||||||
if data_type == "fp32":
|
if data_type == "fp32":
|
||||||
return np.float32
|
return np.float32
|
||||||
|
@ -35,8 +37,8 @@ def get_weight_data_type(data_type):
|
||||||
else:
|
else:
|
||||||
assert False, f"Invalid weight data type {data_type}"
|
assert False, f"Invalid weight data type {data_type}"
|
||||||
|
|
||||||
def split_and_convert_process(i, saved_dir,factor,key,args, val):
|
|
||||||
|
|
||||||
|
def split_and_convert_process(i, saved_dir, factor, key, val):
|
||||||
if key.find("input_layernorm.weight") != -1 or key.find("input_layernorm.bias") != -1 or \
|
if key.find("input_layernorm.weight") != -1 or key.find("input_layernorm.bias") != -1 or \
|
||||||
key.find("attention.dense.bias") != -1 or key.find("post_attention_layernorm.weight") != -1 or \
|
key.find("attention.dense.bias") != -1 or key.find("post_attention_layernorm.weight") != -1 or \
|
||||||
key.find("post_attention_layernorm.bias") != -1 or key.find("mlp.dense_4h_to_h.bias") != -1 or \
|
key.find("post_attention_layernorm.bias") != -1 or key.find("mlp.dense_4h_to_h.bias") != -1 or \
|
||||||
|
@ -70,16 +72,16 @@ def split_and_convert_process(i, saved_dir,factor,key,args, val):
|
||||||
else:
|
else:
|
||||||
print("[ERROR] cannot find key '{}'".format(key))
|
print("[ERROR] cannot find key '{}'".format(key))
|
||||||
|
|
||||||
|
|
||||||
def split_and_convert(args):
|
def split_and_convert(args):
|
||||||
saved_dir = args.saved_dir + "/%d-gpu/" % args.infer_gpu_num
|
saved_dir = args.saved_dir + "/%d-gpu/" % args.infer_gpu_num
|
||||||
|
|
||||||
if(os.path.exists(saved_dir) == False):
|
if os.path.exists(saved_dir) is False:
|
||||||
os.makedirs(saved_dir)
|
os.makedirs(saved_dir)
|
||||||
ckpt_name = args.in_file
|
|
||||||
|
|
||||||
t_gpu_num = args.trained_gpu_num
|
t_gpu_num = args.trained_gpu_num
|
||||||
i_gpu_num = args.infer_gpu_num
|
i_gpu_num = args.infer_gpu_num
|
||||||
assert(i_gpu_num % t_gpu_num == 0)
|
assert (i_gpu_num % t_gpu_num == 0)
|
||||||
|
|
||||||
factor = (int)(i_gpu_num / t_gpu_num)
|
factor = (int)(i_gpu_num / t_gpu_num)
|
||||||
|
|
||||||
|
@ -93,10 +95,10 @@ def split_and_convert(args):
|
||||||
for k, v in vars(model.config).items():
|
for k, v in vars(model.config).items():
|
||||||
config["gpt"][k] = f"{v}"
|
config["gpt"][k] = f"{v}"
|
||||||
config["gpt"]["weight_data_type"] = args.weight_data_type
|
config["gpt"]["weight_data_type"] = args.weight_data_type
|
||||||
with open((Path(saved_dir) / f"config.ini").as_posix(), 'w') as configfile:
|
with open((Path(saved_dir) / "config.ini").as_posix(), 'w') as configfile:
|
||||||
config.write(configfile)
|
config.write(configfile)
|
||||||
except:
|
except Exception:
|
||||||
print(f"Fail to save the config in config.ini.")
|
print("Fail to save the config in config.ini.")
|
||||||
np_weight_data_type = get_weight_data_type(args.weight_data_type)
|
np_weight_data_type = get_weight_data_type(args.weight_data_type)
|
||||||
|
|
||||||
huggingface_model_name_pattern = [
|
huggingface_model_name_pattern = [
|
||||||
|
@ -130,9 +132,11 @@ def split_and_convert(args):
|
||||||
if name == 'transformer.wte.weight':
|
if name == 'transformer.wte.weight':
|
||||||
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(saved_dir + "model.wte.bin")
|
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(saved_dir + "model.wte.bin")
|
||||||
elif name == 'transformer.ln_f.bias':
|
elif name == 'transformer.ln_f.bias':
|
||||||
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(saved_dir + "model.final_layernorm.bias.bin")
|
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(
|
||||||
|
saved_dir + "model.final_layernorm.bias.bin")
|
||||||
elif name == 'transformer.ln_f.weight':
|
elif name == 'transformer.ln_f.weight':
|
||||||
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(saved_dir + "model.final_layernorm.weight.bin")
|
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(
|
||||||
|
saved_dir + "model.final_layernorm.weight.bin")
|
||||||
elif name == 'lm_head.weight':
|
elif name == 'lm_head.weight':
|
||||||
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(saved_dir + "model.lm_head.weight.bin")
|
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(saved_dir + "model.lm_head.weight.bin")
|
||||||
elif name == 'lm_head.bias':
|
elif name == 'lm_head.bias':
|
||||||
|
@ -156,12 +160,12 @@ def split_and_convert(args):
|
||||||
weights = param.detach().cpu().numpy().astype(np_weight_data_type)
|
weights = param.detach().cpu().numpy().astype(np_weight_data_type)
|
||||||
|
|
||||||
# Some weights need to be transposed
|
# Some weights need to be transposed
|
||||||
if name.find("mlp.fc_in.weight") != -1 or \
|
if name.find("mlp.fc_in.weight") != -1 or name.find("mlp.fc_out.weight") != -1 or \
|
||||||
name.find("mlp.fc_out.weight") != -1 or \
|
|
||||||
name.find("attn.out_proj.weight") != -1:
|
name.find("attn.out_proj.weight") != -1:
|
||||||
weights = weights.T
|
weights = weights.T
|
||||||
|
|
||||||
new_name = name.replace("transformer.h.", "layers.").replace(huggingface_model_name_pattern[i], ft_model_name_pattern[i])
|
new_name = name.replace("transformer.h.", "layers.").replace(huggingface_model_name_pattern[i],
|
||||||
|
ft_model_name_pattern[i])
|
||||||
|
|
||||||
pool.starmap(split_and_convert_process,
|
pool.starmap(split_and_convert_process,
|
||||||
[(0, saved_dir, factor, new_name, args,
|
[(0, saved_dir, factor, new_name, args,
|
||||||
|
@ -170,14 +174,17 @@ def split_and_convert(args):
|
||||||
pool.close()
|
pool.close()
|
||||||
pool.join()
|
pool.join()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
|
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
|
||||||
parser.add_argument('-saved_dir', '-o', type=str, help='file name of output file', required=True)
|
parser.add_argument('-saved_dir', '-o', type=str, help='file name of output file', required=True)
|
||||||
parser.add_argument('-in_file', '-i', type=str, help='HF model name or directory', required=True)
|
parser.add_argument('-in_file', '-i', type=str, help='HF model name or directory', required=True)
|
||||||
parser.add_argument('-trained_gpu_num', '-t_g', type=int, help='How many gpus for training', default=1)
|
parser.add_argument('-trained_gpu_num', '-t_g', type=int, help='How many gpus for training', default=1)
|
||||||
parser.add_argument('-infer_gpu_num', '-i_g', type=int, help='How many gpus for inference', required=True)
|
parser.add_argument('-infer_gpu_num', '-i_g', type=int, help='How many gpus for inference', required=True)
|
||||||
parser.add_argument("-processes", "-p", type=int, help="How many processes to spawn for conversion (default: 4)", default=4)
|
parser.add_argument("-processes", "-p", type=int, help="How many processes to spawn for conversion (default: 4)",
|
||||||
parser.add_argument("-weight_data_type", type=str, default="fp32", choices=["fp32", "fp16"], help="output weight data type")
|
default=4)
|
||||||
|
parser.add_argument("-weight_data_type", type=str, default="fp32", choices=["fp32", "fp16"],
|
||||||
|
help="output weight data type")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
print("\n=============== Argument ===============")
|
print("\n=============== Argument ===============")
|
||||||
|
|
|
@ -6,10 +6,12 @@ from string import Template
|
||||||
from transformers import GPTJConfig, AutoTokenizer
|
from transformers import GPTJConfig, AutoTokenizer
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|
||||||
def round_up(x, multiple):
|
def round_up(x, multiple):
|
||||||
remainder = x % multiple
|
remainder = x % multiple
|
||||||
return x if remainder == 0 else x + multiple - remainder
|
return x if remainder == 0 else x + multiple - remainder
|
||||||
|
|
||||||
|
|
||||||
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||||
CONFIG_TEMPLATE_PATH = os.path.join(SCRIPT_DIR, 'config_template.pbtxt')
|
CONFIG_TEMPLATE_PATH = os.path.join(SCRIPT_DIR, 'config_template.pbtxt')
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,7 @@ app = FastAPI(
|
||||||
swagger_ui_parameters={"defaultModelsExpandDepth": -1}
|
swagger_ui_parameters={"defaultModelsExpandDepth": -1}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.exception_handler(FauxPilotException)
|
@app.exception_handler(FauxPilotException)
|
||||||
async def fauxpilot_handler(request: Request, exc: FauxPilotException):
|
async def fauxpilot_handler(request: Request, exc: FauxPilotException):
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
|
@ -34,6 +35,7 @@ async def fauxpilot_handler(request: Request, exc: FauxPilotException):
|
||||||
content=exc.json()
|
content=exc.json()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# Used to support copilot.vim
|
# Used to support copilot.vim
|
||||||
@app.get("/copilot_internal/v2/token")
|
@app.get("/copilot_internal/v2/token")
|
||||||
def get_copilot_token():
|
def get_copilot_token():
|
||||||
|
@ -43,6 +45,7 @@ def get_copilot_token():
|
||||||
content=content
|
content=content
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/v1/engines/codegen/completions")
|
@app.post("/v1/engines/codegen/completions")
|
||||||
# Used to support copilot.vim
|
# Used to support copilot.vim
|
||||||
@app.post("/v1/engines/copilot-codex/completions")
|
@app.post("/v1/engines/copilot-codex/completions")
|
||||||
|
|
|
@ -2,9 +2,11 @@ from typing import Optional, Union
|
||||||
|
|
||||||
from pydantic import BaseModel, constr
|
from pydantic import BaseModel, constr
|
||||||
|
|
||||||
|
ModelType = constr(regex="^(fastertransformer|py-model)$")
|
||||||
|
|
||||||
|
|
||||||
class OpenAIinput(BaseModel):
|
class OpenAIinput(BaseModel):
|
||||||
model: constr(regex="^(fastertransformer|py-model)$") = "fastertransformer"
|
model: ModelType = "fastertransformer"
|
||||||
prompt: Optional[str]
|
prompt: Optional[str]
|
||||||
suffix: Optional[str]
|
suffix: Optional[str]
|
||||||
max_tokens: Optional[int] = 16
|
max_tokens: Optional[int] = 16
|
||||||
|
@ -20,4 +22,3 @@ class OpenAIinput(BaseModel):
|
||||||
best_of: Optional[int] = 1
|
best_of: Optional[int] = 1
|
||||||
logit_bias: Optional[dict]
|
logit_bias: Optional[dict]
|
||||||
user: Optional[str]
|
user: Optional[str]
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,12 @@
|
||||||
from typing import *
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
class FauxPilotException(Exception):
|
class FauxPilotException(Exception):
|
||||||
def __init__(self, message: str, type: Optional[str] = None, param: Optional[str] = None, code: Optional[int] = None):
|
def __init__(self, message: str, error_type: Optional[str] = None, param: Optional[str] = None,
|
||||||
|
code: Optional[int] = None):
|
||||||
super().__init__(message)
|
super().__init__(message)
|
||||||
self.message = message
|
self.message = message
|
||||||
self.type = type
|
self.error_type = error_type
|
||||||
self.param = param
|
self.param = param
|
||||||
self.code = code
|
self.code = code
|
||||||
|
|
||||||
|
@ -12,7 +14,7 @@ class FauxPilotException(Exception):
|
||||||
return {
|
return {
|
||||||
'error': {
|
'error': {
|
||||||
'message': self.message,
|
'message': self.message,
|
||||||
'type': self.type,
|
'type': self.error_type,
|
||||||
'param': self.param,
|
'param': self.param,
|
||||||
'code': self.code
|
'code': self.code
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,6 @@ services:
|
||||||
copilot_proxy:
|
copilot_proxy:
|
||||||
# For dockerhub version
|
# For dockerhub version
|
||||||
# image: moyix/copilot_proxy:latest
|
# image: moyix/copilot_proxy:latest
|
||||||
# command: python3 -m flask run --host=0.0.0.0 --port=5000
|
|
||||||
# For local build
|
# For local build
|
||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
|
|
3
setup.cfg
Normal file
3
setup.cfg
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
[flake8]
|
||||||
|
max-line-length = 120
|
||||||
|
exclude = venv
|
|
@ -27,6 +27,7 @@ def setup_module():
|
||||||
if root.joinpath(".env").exists():
|
if root.joinpath(".env").exists():
|
||||||
shutil.move(str(root.joinpath(".env")), str(root.joinpath(".env.bak")))
|
shutil.move(str(root.joinpath(".env")), str(root.joinpath(".env.bak")))
|
||||||
|
|
||||||
|
|
||||||
def teardown_module():
|
def teardown_module():
|
||||||
"""
|
"""
|
||||||
Teardown steps for tests in this module
|
Teardown steps for tests in this module
|
||||||
|
@ -43,6 +44,7 @@ def teardown_module():
|
||||||
f"Exception: {exc}"
|
f"Exception: {exc}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def enter_input(proc: pexpect.spawn, expect: str, input_s: str, timeout: int = 5) -> str:
|
def enter_input(proc: pexpect.spawn, expect: str, input_s: str, timeout: int = 5) -> str:
|
||||||
"""
|
"""
|
||||||
Helper function to enter input for a given prompt. Returns consumed output.
|
Helper function to enter input for a given prompt. Returns consumed output.
|
||||||
|
@ -61,6 +63,7 @@ def enter_input(proc: pexpect.spawn, expect: str, input_s: str, timeout: int = 5
|
||||||
proc.sendline(input_s)
|
proc.sendline(input_s)
|
||||||
return after
|
return after
|
||||||
|
|
||||||
|
|
||||||
def run_common_setup_steps(n_gpus: int = 0) -> pexpect.spawn:
|
def run_common_setup_steps(n_gpus: int = 0) -> pexpect.spawn:
|
||||||
"""
|
"""
|
||||||
Helper function to run common setup steps.
|
Helper function to run common setup steps.
|
||||||
|
@ -78,6 +81,7 @@ def run_common_setup_steps(n_gpus: int = 0) -> pexpect.spawn:
|
||||||
|
|
||||||
return proc
|
return proc
|
||||||
|
|
||||||
|
|
||||||
def load_test_env():
|
def load_test_env():
|
||||||
"""
|
"""
|
||||||
Load test env vars
|
Load test env vars
|
||||||
|
@ -90,6 +94,7 @@ def load_test_env():
|
||||||
env[key] = val
|
env[key] = val
|
||||||
return env
|
return env
|
||||||
|
|
||||||
|
|
||||||
def run_inference(
|
def run_inference(
|
||||||
prompt: str, model: str = "py-model", port: int = 5000, return_all: bool = False,
|
prompt: str, model: str = "py-model", port: int = 5000, return_all: bool = False,
|
||||||
**kwargs
|
**kwargs
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue