mirror of
https://github.com/fauxpilot/fauxpilot.git
synced 2025-07-16 10:03:25 -07:00
Dev (#148)
* Create publish-docker-images.yaml * Add copilot_proxy publishing * Add model_converter publishing * Use dockerhub version * Do not login for PRs * Overwrite some of labels value * Move ignore files to the root of `context` * Add comments & fix some issue * Fix typos * Remove the target of the master branch * Delete .dockerignore * Delete .dockerignore * Add Flake8 * Add Flake8 and format code accordingly * Iterate on the PR template, fix the token for the contributor action * Remove converter image build * Update Dockerfile of proxy * Comment out proxy image in compose Co-authored-by: Fred de Gier <freddegier@me.com> * Fix build action --------- Co-authored-by: Rowe Wilson Frederisk Holme <frederisk@outlook.com>
This commit is contained in:
parent
1bb0e53117
commit
283668448d
13 changed files with 198 additions and 67 deletions
27
.github/PULL_REQUEST_TEMPLATE.md
vendored
27
.github/PULL_REQUEST_TEMPLATE.md
vendored
|
@ -1,18 +1,12 @@
|
|||
---
|
||||
# [Template] PR Description
|
||||
|
||||
In general, the github system duplicates your commit message automatically for your convenience.
|
||||
After composing your own PR description using this template, please remove any unneeded portions.
|
||||
```bash
|
||||
## 1. General Description
|
||||
The commit title must begin with one of the eleven given options.
|
||||
Build, chore, CI, documentation, task, fix, performance, refactor, revert, style, and test are some examples.
|
||||
or more details, please see [HERE](https://www.conventionalcommits.org/en/v1.0.0/).
|
||||
Summarize changes in no more than 50 characters ASAP for readability and maintenance.
|
||||
<!-- Describe what this PR intents to do -->
|
||||
|
||||
|
||||
## 2. Changes proposed in this PR:
|
||||
- Bulleted lists are also acceptable.
|
||||
- Typically, a hyphen or asterisk before the bullet, followed by a single space.
|
||||
<!-- Bulleted lists are also acceptable. Typically, a hyphen or asterisk before the bullet, followed by a single space.-->
|
||||
1.
|
||||
1.
|
||||
|
||||
Resolves: #{GitHub-Issue-Number}
|
||||
See also: #{GitHub-Issue-Number}
|
||||
|
@ -20,8 +14,9 @@ See also: #{GitHub-Issue-Number}
|
|||
|
||||
## 3. How to evaluate:
|
||||
1. Describe how to evaluate such that it may be reproduced by the reviewer (s).
|
||||
2. Self assessment:**
|
||||
- Build test: [ ]Passed [ ]Failed [*]Skipped
|
||||
- Run test: [ ]Passed [ ]Failed [* ]Skipped
|
||||
```
|
||||
|
||||
1.
|
||||
1.
|
||||
1.
|
||||
1. Self assessment:
|
||||
- [ ] Successfully build locally `docker-compose build`:
|
||||
- [ ] Successfully tested the full solution locally
|
||||
|
|
17
.github/workflows/lint.yml
vendored
Normal file
17
.github/workflows/lint.yml
vendored
Normal file
|
@ -0,0 +1,17 @@
|
|||
name: flake8 Lint
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
flake8-lint:
|
||||
runs-on: ubuntu-latest
|
||||
name: Lint
|
||||
steps:
|
||||
- name: Check out source repository
|
||||
uses: actions/checkout@v3
|
||||
- name: Set up Python environment
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.10"
|
||||
- name: flake8 Lint
|
||||
uses: py-actions/flake8@v2
|
94
.github/workflows/publish-docker-images.yaml
vendored
Normal file
94
.github/workflows/publish-docker-images.yaml
vendored
Normal file
|
@ -0,0 +1,94 @@
|
|||
name: Publish Docker Images
|
||||
|
||||
on:
|
||||
# `push` will build and public the image with a tag corresponding to the branch name (such as `main`, `master`).
|
||||
# `push tag` will build and public the image with a tag has the same name as git tag (such as `v1.2.3`, `v0.0.1-alpha.1`) and a tag named `latest`.
|
||||
push:
|
||||
branches: [main]
|
||||
# `pull request` will only build to test the image, not publish.
|
||||
pull_request:
|
||||
# The branches below must be a subset of the branches above.
|
||||
branches: [main]
|
||||
# `release publish` is almost the same as `push tag`.
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
jobs:
|
||||
publish_copilot_proxy:
|
||||
name: Publish copilot_proxy
|
||||
# The `-latest` runner images are the latest **stable** images that GitHub provides.
|
||||
# Here it is equivalent to `ubuntu-20.04`.
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Extract Docker metadata
|
||||
uses: docker/metadata-action@v4
|
||||
with:
|
||||
# Define image name
|
||||
images: moyix/copilot_proxy
|
||||
# Overwrite labels
|
||||
labels: |
|
||||
org.opencontainers.image.title=CopilotProxy
|
||||
org.opencontainers.image.description=A simple proxy that enables triton to send back copilot compatible communication content.
|
||||
org.opencontainers.image.url=https://github.com/moyix/fauxpilot/tree/main/copilot_proxy
|
||||
org.opencontainers.image.source=https://github.com/moyix/fauxpilot/tree/main/copilot_proxy
|
||||
|
||||
- name: Login to Docker Hub
|
||||
# Do not login for PRs
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Build and push images
|
||||
uses: docker/build-push-action@v3
|
||||
with:
|
||||
# For PRs, just build and not push.
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
context: .
|
||||
file: proxy.Dockerfile
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
|
||||
# publish_model_converter:
|
||||
# name: Publish model_converter
|
||||
# # The `-latest` runner images are the latest **stable** images that GitHub provides.
|
||||
# # Here it is equivalent to `ubuntu-20.04`.
|
||||
# runs-on: ubuntu-latest
|
||||
|
||||
# steps:
|
||||
# - name: Checkout repository
|
||||
# uses: actions/checkout@v3
|
||||
|
||||
# - name: Extract Docker metadata
|
||||
# uses: docker/metadata-action@v4
|
||||
# with:
|
||||
# # Define image name
|
||||
# images: moyix/model_converter
|
||||
# # Overwrite labels
|
||||
# labels: |
|
||||
# org.opencontainers.image.title=ModelConverter
|
||||
# org.opencontainers.image.description=Convert the model to GPT-J to adapt the FasterTransformer Backend.
|
||||
# org.opencontainers.image.url=https://github.com/moyix/fauxpilot/tree/main/converter
|
||||
# org.opencontainers.image.source=https://github.com/moyix/fauxpilot/tree/main/converter
|
||||
|
||||
# - name: Login to Docker Hub
|
||||
# # Do not login for PRs.
|
||||
# if: github.event_name != 'pull_request'
|
||||
# uses: docker/login-action@v2
|
||||
# with:
|
||||
# username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
# password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
# - name: Build and push images
|
||||
# uses: docker/build-push-action@v3
|
||||
# with:
|
||||
# # For PRs, just build and not push.
|
||||
# push: ${{ github.event_name != 'pull_request' }}
|
||||
# context: ./converter
|
||||
# tags: ${{ steps.meta.outputs.tags }}
|
||||
# labels: ${{ steps.meta.outputs.labels }}
|
|
@ -13,7 +13,7 @@ jobs:
|
|||
- name: 'Greet the contributor'
|
||||
uses: garg3133/welcome-new-contributors@v1.2
|
||||
with:
|
||||
token: ${{ secrets.BOT_ACCESS_TOKEN }}
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
is-oauth-token: true
|
||||
issue-message: 'Hello there, thanks for opening your first issue. We
|
||||
welcome you to the FauxPilot community!'
|
||||
|
|
|
@ -4,14 +4,14 @@ import argparse
|
|||
import torch
|
||||
from transformers import GPTJForCausalLM, GPTJConfig
|
||||
# Note: these need the git version of Transformers as of 7/22/2022
|
||||
from transformers import CodeGenTokenizer, CodeGenForCausalLM
|
||||
from transformers import CodeGenTokenizer, CodeGenForCausalLM # noqa: F401
|
||||
from transformers import CODEGEN_PRETRAINED_MODEL_ARCHIVE_LIST
|
||||
|
||||
parser = argparse.ArgumentParser('Convert SalesForce CodeGen model to GPT-J')
|
||||
parser.add_argument('--code_model',
|
||||
choices=CODEGEN_PRETRAINED_MODEL_ARCHIVE_LIST, default='Salesforce/codegen-350M-multi',
|
||||
help='which SalesForce model to convert'
|
||||
)
|
||||
)
|
||||
parser.add_argument('output_dir', help='where to store the converted model')
|
||||
args = parser.parse_args()
|
||||
|
||||
|
@ -47,13 +47,16 @@ config.tokenizer_class = 'CodeGenTokenizer'
|
|||
gptj_model = GPTJForCausalLM(config).half()
|
||||
embed_dim = config.n_embd
|
||||
|
||||
def replace(model, weights, name):
|
||||
model.state_dict()[name].copy_(weights.detach())
|
||||
|
||||
def replace(model, weights, model_name):
|
||||
model.state_dict()[model_name].copy_(weights.detach())
|
||||
|
||||
|
||||
def replace_by_name(dest_model, src_model, old_name, new_name):
|
||||
assert old_name in src_model.state_dict()
|
||||
assert new_name in dest_model.state_dict()
|
||||
replace(dest_model, src_model.state_dict()[old_name], new_name)
|
||||
replace(model=dest_model, weights=src_model.state_dict()[old_name], model_name=new_name)
|
||||
|
||||
|
||||
print('Converting...')
|
||||
# Copy weights from CodeGen model
|
||||
|
@ -72,9 +75,9 @@ with torch.no_grad():
|
|||
# After a great deal of pain, I figured out that this permutation on
|
||||
# the weights of the qkv_proj fixes it.
|
||||
base_permutation = [0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11]
|
||||
permutation = torch.cat([torch.arange(i*local_dim, (i+1)*local_dim) for i in base_permutation])
|
||||
permutation = torch.cat([torch.arange(i * local_dim, (i + 1) * local_dim) for i in base_permutation])
|
||||
# NB: we permute the *rows* here because the computation is xA.T
|
||||
new_qkv_proj = qkv_proj[permutation,:]
|
||||
new_qkv_proj = qkv_proj[permutation, :]
|
||||
# NB: the name QKV is misleading here; they are actually stored in
|
||||
# the order QVK
|
||||
query, value, key = torch.split(new_qkv_proj, embed_dim, dim=0)
|
||||
|
@ -82,7 +85,7 @@ with torch.no_grad():
|
|||
replace(gptj_model, key, name.replace('qkv_proj', 'k_proj'))
|
||||
replace(gptj_model, value, name.replace('qkv_proj', 'v_proj'))
|
||||
else:
|
||||
replace_by_name(gptj_model, cg_model, name, name)
|
||||
replace_by_name(dest_model=gptj_model, src_model=cg_model, old_name=name, new_name=name)
|
||||
|
||||
print('Conversion complete.')
|
||||
print(f"Saving model to {args.output_dir}...")
|
||||
|
|
|
@ -23,10 +23,12 @@ import torch
|
|||
import os
|
||||
import sys
|
||||
from transformers import GPTJForCausalLM
|
||||
|
||||
dir_path = os.path.dirname(os.path.realpath(__file__))
|
||||
sys.path.append(dir_path + "/../../../..")
|
||||
sys.path.append(dir_path)
|
||||
|
||||
|
||||
def get_weight_data_type(data_type):
|
||||
if data_type == "fp32":
|
||||
return np.float32
|
||||
|
@ -35,8 +37,8 @@ def get_weight_data_type(data_type):
|
|||
else:
|
||||
assert False, f"Invalid weight data type {data_type}"
|
||||
|
||||
def split_and_convert_process(i, saved_dir,factor,key,args, val):
|
||||
|
||||
def split_and_convert_process(i, saved_dir, factor, key, val):
|
||||
if key.find("input_layernorm.weight") != -1 or key.find("input_layernorm.bias") != -1 or \
|
||||
key.find("attention.dense.bias") != -1 or key.find("post_attention_layernorm.weight") != -1 or \
|
||||
key.find("post_attention_layernorm.bias") != -1 or key.find("mlp.dense_4h_to_h.bias") != -1 or \
|
||||
|
@ -70,16 +72,16 @@ def split_and_convert_process(i, saved_dir,factor,key,args, val):
|
|||
else:
|
||||
print("[ERROR] cannot find key '{}'".format(key))
|
||||
|
||||
|
||||
def split_and_convert(args):
|
||||
saved_dir = args.saved_dir + "/%d-gpu/" % args.infer_gpu_num
|
||||
|
||||
if(os.path.exists(saved_dir) == False):
|
||||
if os.path.exists(saved_dir) is False:
|
||||
os.makedirs(saved_dir)
|
||||
ckpt_name = args.in_file
|
||||
|
||||
t_gpu_num = args.trained_gpu_num
|
||||
i_gpu_num = args.infer_gpu_num
|
||||
assert(i_gpu_num % t_gpu_num == 0)
|
||||
assert (i_gpu_num % t_gpu_num == 0)
|
||||
|
||||
factor = (int)(i_gpu_num / t_gpu_num)
|
||||
|
||||
|
@ -93,10 +95,10 @@ def split_and_convert(args):
|
|||
for k, v in vars(model.config).items():
|
||||
config["gpt"][k] = f"{v}"
|
||||
config["gpt"]["weight_data_type"] = args.weight_data_type
|
||||
with open((Path(saved_dir) / f"config.ini").as_posix(), 'w') as configfile:
|
||||
with open((Path(saved_dir) / "config.ini").as_posix(), 'w') as configfile:
|
||||
config.write(configfile)
|
||||
except:
|
||||
print(f"Fail to save the config in config.ini.")
|
||||
except Exception:
|
||||
print("Fail to save the config in config.ini.")
|
||||
np_weight_data_type = get_weight_data_type(args.weight_data_type)
|
||||
|
||||
huggingface_model_name_pattern = [
|
||||
|
@ -130,9 +132,11 @@ def split_and_convert(args):
|
|||
if name == 'transformer.wte.weight':
|
||||
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(saved_dir + "model.wte.bin")
|
||||
elif name == 'transformer.ln_f.bias':
|
||||
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(saved_dir + "model.final_layernorm.bias.bin")
|
||||
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(
|
||||
saved_dir + "model.final_layernorm.bias.bin")
|
||||
elif name == 'transformer.ln_f.weight':
|
||||
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(saved_dir + "model.final_layernorm.weight.bin")
|
||||
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(
|
||||
saved_dir + "model.final_layernorm.weight.bin")
|
||||
elif name == 'lm_head.weight':
|
||||
param.detach().cpu().numpy().astype(np_weight_data_type).tofile(saved_dir + "model.lm_head.weight.bin")
|
||||
elif name == 'lm_head.bias':
|
||||
|
@ -156,12 +160,12 @@ def split_and_convert(args):
|
|||
weights = param.detach().cpu().numpy().astype(np_weight_data_type)
|
||||
|
||||
# Some weights need to be transposed
|
||||
if name.find("mlp.fc_in.weight") != -1 or \
|
||||
name.find("mlp.fc_out.weight") != -1 or \
|
||||
if name.find("mlp.fc_in.weight") != -1 or name.find("mlp.fc_out.weight") != -1 or \
|
||||
name.find("attn.out_proj.weight") != -1:
|
||||
weights = weights.T
|
||||
|
||||
new_name = name.replace("transformer.h.", "layers.").replace(huggingface_model_name_pattern[i], ft_model_name_pattern[i])
|
||||
new_name = name.replace("transformer.h.", "layers.").replace(huggingface_model_name_pattern[i],
|
||||
ft_model_name_pattern[i])
|
||||
|
||||
pool.starmap(split_and_convert_process,
|
||||
[(0, saved_dir, factor, new_name, args,
|
||||
|
@ -170,14 +174,17 @@ def split_and_convert(args):
|
|||
pool.close()
|
||||
pool.join()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
|
||||
parser.add_argument('-saved_dir', '-o', type=str, help='file name of output file', required=True)
|
||||
parser.add_argument('-in_file', '-i', type=str, help='HF model name or directory', required=True)
|
||||
parser.add_argument('-trained_gpu_num', '-t_g', type=int, help='How many gpus for training', default=1)
|
||||
parser.add_argument('-infer_gpu_num', '-i_g', type=int, help='How many gpus for inference', required=True)
|
||||
parser.add_argument("-processes", "-p", type=int, help="How many processes to spawn for conversion (default: 4)", default=4)
|
||||
parser.add_argument("-weight_data_type", type=str, default="fp32", choices=["fp32", "fp16"], help="output weight data type")
|
||||
parser.add_argument("-processes", "-p", type=int, help="How many processes to spawn for conversion (default: 4)",
|
||||
default=4)
|
||||
parser.add_argument("-weight_data_type", type=str, default="fp32", choices=["fp32", "fp16"],
|
||||
help="output weight data type")
|
||||
|
||||
args = parser.parse_args()
|
||||
print("\n=============== Argument ===============")
|
||||
|
|
|
@ -6,10 +6,12 @@ from string import Template
|
|||
from transformers import GPTJConfig, AutoTokenizer
|
||||
import torch
|
||||
|
||||
|
||||
def round_up(x, multiple):
|
||||
remainder = x % multiple
|
||||
return x if remainder == 0 else x + multiple - remainder
|
||||
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||
CONFIG_TEMPLATE_PATH = os.path.join(SCRIPT_DIR, 'config_template.pbtxt')
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@ app = FastAPI(
|
|||
swagger_ui_parameters={"defaultModelsExpandDepth": -1}
|
||||
)
|
||||
|
||||
|
||||
@app.exception_handler(FauxPilotException)
|
||||
async def fauxpilot_handler(request: Request, exc: FauxPilotException):
|
||||
return JSONResponse(
|
||||
|
@ -34,6 +35,7 @@ async def fauxpilot_handler(request: Request, exc: FauxPilotException):
|
|||
content=exc.json()
|
||||
)
|
||||
|
||||
|
||||
# Used to support copilot.vim
|
||||
@app.get("/copilot_internal/v2/token")
|
||||
def get_copilot_token():
|
||||
|
@ -43,6 +45,7 @@ def get_copilot_token():
|
|||
content=content
|
||||
)
|
||||
|
||||
|
||||
@app.post("/v1/engines/codegen/completions")
|
||||
# Used to support copilot.vim
|
||||
@app.post("/v1/engines/copilot-codex/completions")
|
||||
|
|
|
@ -2,9 +2,11 @@ from typing import Optional, Union
|
|||
|
||||
from pydantic import BaseModel, constr
|
||||
|
||||
ModelType = constr(regex="^(fastertransformer|py-model)$")
|
||||
|
||||
|
||||
class OpenAIinput(BaseModel):
|
||||
model: constr(regex="^(fastertransformer|py-model)$") = "fastertransformer"
|
||||
model: ModelType = "fastertransformer"
|
||||
prompt: Optional[str]
|
||||
suffix: Optional[str]
|
||||
max_tokens: Optional[int] = 16
|
||||
|
@ -20,4 +22,3 @@ class OpenAIinput(BaseModel):
|
|||
best_of: Optional[int] = 1
|
||||
logit_bias: Optional[dict]
|
||||
user: Optional[str]
|
||||
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
from typing import *
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class FauxPilotException(Exception):
|
||||
def __init__(self, message: str, type: Optional[str] = None, param: Optional[str] = None, code: Optional[int] = None):
|
||||
def __init__(self, message: str, error_type: Optional[str] = None, param: Optional[str] = None,
|
||||
code: Optional[int] = None):
|
||||
super().__init__(message)
|
||||
self.message = message
|
||||
self.type = type
|
||||
self.error_type = error_type
|
||||
self.param = param
|
||||
self.code = code
|
||||
|
||||
|
@ -12,7 +14,7 @@ class FauxPilotException(Exception):
|
|||
return {
|
||||
'error': {
|
||||
'message': self.message,
|
||||
'type': self.type,
|
||||
'type': self.error_type,
|
||||
'param': self.param,
|
||||
'code': self.code
|
||||
}
|
||||
|
|
|
@ -23,7 +23,6 @@ services:
|
|||
copilot_proxy:
|
||||
# For dockerhub version
|
||||
# image: moyix/copilot_proxy:latest
|
||||
# command: python3 -m flask run --host=0.0.0.0 --port=5000
|
||||
# For local build
|
||||
build:
|
||||
context: .
|
||||
|
|
3
setup.cfg
Normal file
3
setup.cfg
Normal file
|
@ -0,0 +1,3 @@
|
|||
[flake8]
|
||||
max-line-length = 120
|
||||
exclude = venv
|
|
@ -27,6 +27,7 @@ def setup_module():
|
|||
if root.joinpath(".env").exists():
|
||||
shutil.move(str(root.joinpath(".env")), str(root.joinpath(".env.bak")))
|
||||
|
||||
|
||||
def teardown_module():
|
||||
"""
|
||||
Teardown steps for tests in this module
|
||||
|
@ -43,6 +44,7 @@ def teardown_module():
|
|||
f"Exception: {exc}"
|
||||
)
|
||||
|
||||
|
||||
def enter_input(proc: pexpect.spawn, expect: str, input_s: str, timeout: int = 5) -> str:
|
||||
"""
|
||||
Helper function to enter input for a given prompt. Returns consumed output.
|
||||
|
@ -61,6 +63,7 @@ def enter_input(proc: pexpect.spawn, expect: str, input_s: str, timeout: int = 5
|
|||
proc.sendline(input_s)
|
||||
return after
|
||||
|
||||
|
||||
def run_common_setup_steps(n_gpus: int = 0) -> pexpect.spawn:
|
||||
"""
|
||||
Helper function to run common setup steps.
|
||||
|
@ -78,6 +81,7 @@ def run_common_setup_steps(n_gpus: int = 0) -> pexpect.spawn:
|
|||
|
||||
return proc
|
||||
|
||||
|
||||
def load_test_env():
|
||||
"""
|
||||
Load test env vars
|
||||
|
@ -90,6 +94,7 @@ def load_test_env():
|
|||
env[key] = val
|
||||
return env
|
||||
|
||||
|
||||
def run_inference(
|
||||
prompt: str, model: str = "py-model", port: int = 5000, return_all: bool = False,
|
||||
**kwargs
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue