Update with support for CodeGen 2B models

2025-08-20 21:34:13 -07:00 · 2022-08-03 16:14:12 -04:00 · 2022-08-03 16:14:12 -04:00 · 89fdf6a727
commit 89fdf6a727
parent 695d77c27e
2 changed files with 21 additions and 15 deletions
--- a/README.md
+++ b/README.md
@ -18,18 +18,20 @@ lmao

 ## Setup

-Run the setup script to choose a model to use. This will download the model from Huggingface and then convert it for use with FasterTransformer. Right now the 2B model is not available because of a [hard-coded check](https://github.com/NVIDIA/FasterTransformer/blob/main/src/fastertransformer/layers/attention_layers/DecoderSelfAttentionLayer.cc#L231-L232) in FasterTransformer that excludes it; hopefully this can be fixed soon!
+Run the setup script to choose a model to use. This will download the model from Huggingface and then convert it for use with FasterTransformer.

 ```
 $ ./setup.sh 
 Models available:
 [1] codegen-350M-mono (2GB total VRAM required; Python-only)
 [2] codegen-350M-multi (2GB total VRAM required; multi-language)
-[3] codegen-6B-mono (13GB total VRAM required; Python-only)
-[4] codegen-6B-multi (13GB total VRAM required; multi-language)
-[5] codegen-16B-mono (32GB total VRAM required; Python-only)
-[6] codegen-16B-multi (32GB total VRAM required; multi-language)
-Enter your choice [4]: 2
+[3] codegen-2B-mono (7GB total VRAM required; Python-only)
+[4] codegen-2B-multi (7GB total VRAM required; multi-language)
+[5] codegen-6B-mono (13GB total VRAM required; Python-only)
+[6] codegen-6B-multi (13GB total VRAM required; multi-language)
+[7] codegen-16B-mono (32GB total VRAM required; Python-only)
+[8] codegen-16B-multi (32GB total VRAM required; multi-language)
+Enter your choice [6]: 2
 Enter number of GPUs [1]: 1
 Where do you want to save the model [/home/moyix/git/fauxpilot/models]? /fastdata/mymodels
 Downloading and converting the model, this will take a while...
--- a/setup.sh
+++ b/setup.sh
@ -9,21 +9,25 @@ fi
 echo "Models available:"
 echo "[1] codegen-350M-mono (2GB total VRAM required; Python-only)"
 echo "[2] codegen-350M-multi (2GB total VRAM required; multi-language)"
-echo "[3] codegen-6B-mono (13GB total VRAM required; Python-only)"
-echo "[4] codegen-6B-multi (13GB total VRAM required; multi-language)"
-echo "[5] codegen-16B-mono (32GB total VRAM required; Python-only)"
-echo "[6] codegen-16B-multi (32GB total VRAM required; multi-language)"
+echo "[3] codegen-2B-mono (7GB total VRAM required; Python-only)"
+echo "[4] codegen-2B-multi (7GB total VRAM required; multi-language)"
+echo "[5] codegen-6B-mono (13GB total VRAM required; Python-only)"
+echo "[6] codegen-6B-multi (13GB total VRAM required; multi-language)"
+echo "[7] codegen-16B-mono (32GB total VRAM required; Python-only)"
+echo "[8] codegen-16B-multi (32GB total VRAM required; multi-language)"
 # Read their choice
-read -p "Enter your choice [4]: " MODEL_NUM
+read -p "Enter your choice [6]: " MODEL_NUM

 # Convert model number to model name
 case $MODEL_NUM in
    1) MODEL="codegen-350M-mono" ;;
    2) MODEL="codegen-350M-multi" ;;
-    3) MODEL="codegen-6B-mono" ;;
-    4) MODEL="codegen-6B-multi" ;;
-    5) MODEL="codegen-16B-mono" ;;
-    6) MODEL="codegen-16B-multi" ;;
+    3) MODEL="codegen-2B-mono" ;;
+    4) MODEL="codegen-2B-multi" ;;
+    5) MODEL="codegen-6B-mono" ;;
+    6) MODEL="codegen-6B-multi" ;;
+    7) MODEL="codegen-16B-mono" ;;
+    8) MODEL="codegen-16B-multi" ;;
    *) MODEL="codegen-6B-multi" ;;
 esac