Download preconverted models from HuggingFace when possible

I added the 1 and 2 GPU versions of the converted models to HuggingFace so that in the most common case we don't have to do the (very RAM intensive) conversion process at all. Hopefully HuggingFace won't mind us hosting them there! This should fix #7.
2025-08-14 02:28:06 -07:00 · 2022-08-06 00:05:39 -04:00 · 2022-08-06 00:05:39 -04:00 · 9363bcd586
commit 9363bcd586
parent 759727664b
1 changed files with 15 additions and 2 deletions
--- a/setup.sh
+++ b/setup.sh
@ -55,6 +55,19 @@ fi
 # Create model directory
 mkdir -p "${MODEL_DIR}"

-echo "Downloading and converting the model, this will take a while..."
-docker run --rm -v ${MODEL_DIR}:/models -e MODEL=${MODEL} -e NUM_GPUS=${NUM_GPUS} moyix/model_conveter:latest
+# For some of the models we can download it preconverted.
+if [ $NUM_GPUS -le 2 ]; then
+    echo "Downloading the model from HuggingFace, this will take a while..."
+    SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
+    DEST="${MODEL}-${NUM_GPUS}gpu"
+    ARCHIVE="${MODEL_DIR}/${DEST}.tar.zst"
+    cp -r "$SCRIPT_DIR"/converter/models/"$DEST" "${MODEL_DIR}"
+    curl -L "https://huggingface.co/moyix/${MODEL}-gptj/resolve/main/${MODEL}-${NUM_GPUS}gpu.tar.zst" \
+        -o "$ARCHIVE"
+    zstd -dc "$ARCHIVE" | tar -xf - -C "${MODEL_DIR}"
+    rm -f "$ARCHIVE"
+else
+    echo "Downloading and converting the model, this will take a while..."
+    docker run --rm -v ${MODEL_DIR}:/models -e MODEL=${MODEL} -e NUM_GPUS=${NUM_GPUS} moyix/model_conveter:latest
+fi
 echo "Done! Now run ./launch.sh to start the FauxPilot server."