mirror of
https://github.com/fauxpilot/fauxpilot.git
synced 2025-08-20 21:34:13 -07:00
Add python backend support
- Modify dockerfile to include bitsandbytes, transformers and latest version of pytorch - Minor modifications in utils/codegen.py so that same client works with FT and Py-backend - Minor modifications in launch.sh (no need to name models by GPU) - Add installation script for adding a new python model (with super simple config_template) - Modify setup.sh so that it aworks with both FT and Python backend models Signed-off-by: Parth Thakkar <thakkarparth007@gmail.com>
This commit is contained in:
parent
9b2bc84670
commit
01f1cbb629
9 changed files with 487 additions and 73 deletions
180
python_backend/config_template.pbtxt
Normal file
180
python_backend/config_template.pbtxt
Normal file
|
@ -0,0 +1,180 @@
|
|||
name: "py-model"
|
||||
backend: "python"
|
||||
max_batch_size: 4
|
||||
input [
|
||||
{
|
||||
name: "input_ids"
|
||||
data_type: TYPE_INT32
|
||||
dims: [ -1 ]
|
||||
},
|
||||
{
|
||||
# UNUSED
|
||||
name: "start_id"
|
||||
data_type: TYPE_INT32
|
||||
dims: [ 1 ]
|
||||
reshape: { shape: [ ] }
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
# UNUSED
|
||||
name: "end_id"
|
||||
data_type: TYPE_INT32
|
||||
dims: [ 1 ]
|
||||
reshape: { shape: [ ] }
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
name: "input_lengths"
|
||||
data_type: TYPE_INT32
|
||||
dims: [ 1 ]
|
||||
reshape: { shape: [ ] }
|
||||
},
|
||||
{
|
||||
name: "request_output_len"
|
||||
data_type: TYPE_INT32
|
||||
dims: [ -1 ]
|
||||
},
|
||||
{
|
||||
name: "runtime_top_k"
|
||||
data_type: TYPE_INT32
|
||||
dims: [ 1 ]
|
||||
reshape: { shape: [ ] }
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
name: "runtime_top_p"
|
||||
data_type: TYPE_FP32
|
||||
dims: [ 1 ]
|
||||
reshape: { shape: [ ] }
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
# UNUSED
|
||||
name: "beam_search_diversity_rate"
|
||||
data_type: TYPE_FP32
|
||||
dims: [ 1 ]
|
||||
reshape: { shape: [ ] }
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
name: "temperature"
|
||||
data_type: TYPE_FP32
|
||||
dims: [ 1 ]
|
||||
reshape: { shape: [ ] }
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
# UNUSED
|
||||
name: "len_penalty"
|
||||
data_type: TYPE_FP32
|
||||
dims: [ 1 ]
|
||||
reshape: { shape: [ ] }
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
# UNUSED
|
||||
name: "repetition_penalty"
|
||||
data_type: TYPE_FP32
|
||||
dims: [ 1 ]
|
||||
reshape: { shape: [ ] }
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
# UNUSED
|
||||
name: "random_seed"
|
||||
data_type: TYPE_INT32
|
||||
dims: [ 1 ]
|
||||
reshape: { shape: [ ] }
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
# UNUSED
|
||||
name: "is_return_log_probs"
|
||||
data_type: TYPE_BOOL
|
||||
dims: [ 1 ]
|
||||
reshape: { shape: [ ] }
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
# UNUSED
|
||||
name: "beam_width"
|
||||
data_type: TYPE_INT32
|
||||
dims: [ 1 ]
|
||||
reshape: { shape: [ ] }
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
# UNUSED
|
||||
name: "bad_words_list"
|
||||
data_type: TYPE_INT32
|
||||
dims: [ 2, -1 ]
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
# UNUSED
|
||||
name: "stop_words_list"
|
||||
data_type: TYPE_INT32
|
||||
dims: [ 2, -1 ]
|
||||
optional: true
|
||||
}
|
||||
]
|
||||
output [
|
||||
{
|
||||
name: "output_ids"
|
||||
data_type: TYPE_INT32
|
||||
dims: [ -1, -1 ]
|
||||
},
|
||||
{
|
||||
name: "sequence_length"
|
||||
data_type: TYPE_INT32
|
||||
dims: [ -1 ]
|
||||
} #,
|
||||
# Following is currently unsupported, but should be supported in the future
|
||||
# {
|
||||
# name: "cum_log_probs"
|
||||
# data_type: TYPE_FP32
|
||||
# dims: [ -1 ]
|
||||
# },
|
||||
# {
|
||||
# name: "output_log_probs"
|
||||
# data_type: TYPE_FP32
|
||||
# dims: [ -1, -1 ]
|
||||
# }
|
||||
]
|
||||
# unsure what this is for
|
||||
instance_group [
|
||||
{
|
||||
count: 1
|
||||
kind: KIND_CPU
|
||||
}
|
||||
]
|
||||
parameters {
|
||||
key: "use_half"
|
||||
value: {
|
||||
string_value: "1"
|
||||
}
|
||||
}
|
||||
parameters {
|
||||
key: "model_name"
|
||||
value: {
|
||||
string_value: "${model_name}" # e.g. "codegen-350M-multi"
|
||||
}
|
||||
}
|
||||
parameters {
|
||||
key: "org_name"
|
||||
value: {
|
||||
string_value: "${org_name}" # e.g. "Salesforce"
|
||||
}
|
||||
}
|
||||
parameters {
|
||||
key: "use_int8",
|
||||
value: {
|
||||
string_value: "${use_int8}" # e.g. "0" or "1"
|
||||
}
|
||||
}
|
||||
parameters {
|
||||
key: "use_auto_device_map",
|
||||
value: {
|
||||
string_value: "${use_auto_device_map}" # e.g. "0" or "1"
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue