mirror of https://github.com/THUDM/CodeGeeX.git
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
37 lines
1.0 KiB
Bash
37 lines
1.0 KiB
Bash
# This script is used to convert checkpoint model parallel partitions.
|
|
|
|
LOAD_CKPT_PATH=$1 # Path to weights in .pt format.
|
|
SAVE_CKPT_PATH=$2 # Path to save the output MP checkpoints.
|
|
MP_SIZE=$3 # Model parallel size
|
|
|
|
SCRIPT_PATH=$(realpath "$0")
|
|
SCRIPT_DIR=$(dirname "$SCRIPT_PATH")
|
|
MAIN_DIR=$(dirname "$SCRIPT_DIR")
|
|
TOKENIZER_PATH="$MAIN_DIR/codegeex/tokenizer/"
|
|
|
|
if [ -z "$MP_SIZE" ]; then
|
|
MP_SIZE=1
|
|
fi
|
|
|
|
# export CUDA settings
|
|
export CUDA_HOME=/usr/local/cuda-11.1/
|
|
export CUDA_VISIBLE_DEVICES=0,1
|
|
|
|
|
|
CMD="python $MAIN_DIR/codegeex/megatron/convert_ckpt_parallel.py \
|
|
--load-ckpt-path $LOAD_CKPT_PATH \
|
|
--save-ckpt-path $SAVE_CKPT_PATH \
|
|
--tokenizer-path $TOKENIZER_PATH \
|
|
--target-tensor-model-parallel-size $MP_SIZE \
|
|
--num-layers 39 \
|
|
--hidden-size 5120 \
|
|
--num-attention-heads 40 \
|
|
--max-position-embeddings 2048 \
|
|
--attention-softmax-in-fp32 \
|
|
--fp16 \
|
|
--micro-batch-size 1 \
|
|
--make-vocab-size-divisible-by 52224 \
|
|
--seq-length 2048"
|
|
|
|
echo "$CMD"
|
|
eval "$CMD" |