Download preconverted models from HuggingFace when possible

I added the 1 and 2 GPU versions of the converted models to HuggingFace
so that in the most common case we don't have to do the (very RAM
intensive) conversion process at all. Hopefully HuggingFace won't mind
us hosting them there!

This should fix #7.
This commit is contained in:
Brendan Dolan-Gavitt 2022-08-06 00:05:39 -04:00
commit 9363bcd586

View file

@ -55,6 +55,19 @@ fi
# Create model directory
mkdir -p "${MODEL_DIR}"
echo "Downloading and converting the model, this will take a while..."
docker run --rm -v ${MODEL_DIR}:/models -e MODEL=${MODEL} -e NUM_GPUS=${NUM_GPUS} moyix/model_conveter:latest
# For some of the models we can download it preconverted.
if [ $NUM_GPUS -le 2 ]; then
echo "Downloading the model from HuggingFace, this will take a while..."
SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
DEST="${MODEL}-${NUM_GPUS}gpu"
ARCHIVE="${MODEL_DIR}/${DEST}.tar.zst"
cp -r "$SCRIPT_DIR"/converter/models/"$DEST" "${MODEL_DIR}"
curl -L "https://huggingface.co/moyix/${MODEL}-gptj/resolve/main/${MODEL}-${NUM_GPUS}gpu.tar.zst" \
-o "$ARCHIVE"
zstd -dc "$ARCHIVE" | tar -xf - -C "${MODEL_DIR}"
rm -f "$ARCHIVE"
else
echo "Downloading and converting the model, this will take a while..."
docker run --rm -v ${MODEL_DIR}:/models -e MODEL=${MODEL} -e NUM_GPUS=${NUM_GPUS} moyix/model_conveter:latest
fi
echo "Done! Now run ./launch.sh to start the FauxPilot server."