You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
CodeGeeX/scripts/evaluate_humaneval_x.sh

47 lines
1.1 KiB
Bash

# This script is for evaluating the functional correctness of the generated codes of HumanEval-X.
INPUT_FILE=$1 # Path to the .jsonl file that contains the generated codes.
LANGUAGE=$2 # Target programming language, currently support one of ["python", "java", "cpp", "js", "go"]
N_WORKERS=$3 # Number of parallel workers.
TIMEOUT=$4 # Timeout in seconds.
SCRIPT_PATH=$(realpath "$0")
SCRIPT_DIR=$(dirname "$SCRIPT_PATH")
MAIN_DIR=$(dirname "$SCRIPT_DIR")
echo "$INPUT_FILE"
if [ -z "$N_WORKERS" ]
then
N_WORKERS=64
fi
if [ -z "$LANGUAGE" ]
then
LANGUAGE=python
fi
if [ -z "$TIMEOUT" ]
then
TIMEOUT=5
fi
DATA_DIR=$MAIN_DIR/codegeex/benchmark/humaneval-x/$LANGUAGE/data/humaneval_$LANGUAGE.jsonl.gz
if [ $LANGUAGE = go ]; then
export PATH=$PATH:/usr/local/go/bin
fi
if [ $LANGUAGE = cpp ]; then
export PATH=$PATH:/usr/bin/openssl
fi
CMD="python $MAIN_DIR/codegeex/benchmark/humaneval-x/evaluate_humaneval_x.py \
--input_file "$INPUT_FILE" \
--n_workers $N_WORKERS \
--tmp_dir $MAIN_DIR/codegeex/benchmark/humaneval-x/ \
--problem_file $DATA_DIR \
--timeout $TIMEOUT"
echo "$CMD"
eval "$CMD"