CodeGeeX/scripts/evaluate_humaneval_x.sh

# This script is for evaluating the functional correctness of the generated codes of HumanEval-X.

INPUT_FILE=$1  # Path to the .jsonl file that contains the generated codes.
LANGUAGE=$2  # Target programming language, currently support one of ["python", "java", "cpp", "js", "go"]
N_WORKERS=$3  # Number of parallel workers.
TIMEOUT=$4  # Timeout in seconds.

SCRIPT_PATH=$(realpath "$0")
SCRIPT_DIR=$(dirname "$SCRIPT_PATH")
MAIN_DIR=$(dirname "$SCRIPT_DIR")

echo "$INPUT_FILE"

if [ -z "$N_WORKERS" ]
then
    N_WORKERS=64
fi

if [ -z "$LANGUAGE" ]
then
    LANGUAGE=python
fi

if [ -z "$TIMEOUT" ]
then
    TIMEOUT=5
fi

DATA_DIR=$MAIN_DIR/codegeex/benchmark/humaneval-x/$LANGUAGE/data/humaneval_$LANGUAGE.jsonl.gz

if [ $LANGUAGE = go ]; then
  export PATH=$PATH:/usr/local/go/bin
fi

if [ $LANGUAGE = cpp ]; then
  export PATH=$PATH:/usr/bin/openssl
fi

CMD="python $MAIN_DIR/codegeex/benchmark/humaneval-x/evaluate_humaneval_x.py \
    --input_file "$INPUT_FILE" \
    --n_workers $N_WORKERS \
    --tmp_dir $MAIN_DIR/codegeex/benchmark/humaneval-x/ \
    --problem_file $DATA_DIR \
    --timeout $TIMEOUT"

echo "$CMD"
eval "$CMD"