You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
CodeGeeX/scripts/convert_ckpt_parallel.sh

37 lines
1.0 KiB
Bash

# This script is used to convert checkpoint model parallel partitions.
LOAD_CKPT_PATH=$1 # Path to weights in .pt format.
SAVE_CKPT_PATH=$2 # Path to save the output MP checkpoints.
MP_SIZE=$3 # Model parallel size
SCRIPT_PATH=$(realpath "$0")
SCRIPT_DIR=$(dirname "$SCRIPT_PATH")
MAIN_DIR=$(dirname "$SCRIPT_DIR")
TOKENIZER_PATH="$MAIN_DIR/codegeex/tokenizer/"
if [ -z "$MP_SIZE" ]; then
MP_SIZE=1
fi
# export CUDA settings
export CUDA_HOME=/usr/local/cuda-11.1/
export CUDA_VISIBLE_DEVICES=0,1
CMD="python $MAIN_DIR/codegeex/megatron/convert_ckpt_parallel.py \
--load-ckpt-path $LOAD_CKPT_PATH \
--save-ckpt-path $SAVE_CKPT_PATH \
--tokenizer-path $TOKENIZER_PATH \
--target-tensor-model-parallel-size $MP_SIZE \
--num-layers 39 \
--hidden-size 5120 \
--num-attention-heads 40 \
--max-position-embeddings 2048 \
--attention-softmax-in-fp32 \
--fp16 \
--micro-batch-size 1 \
--make-vocab-size-divisible-by 52224 \
--seq-length 2048"
echo "$CMD"
eval "$CMD"