#!/usr/bin/env bash
# Copyright 2022 Ververica Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This file has the following tasks
# a) It reads the e2e timeout from the configuration file
# b) It prints a warning if the test has reached 80% of it's execution time
# c) N minutes before the end of the execution time, it will start uploading the current output as azure artifacts

COMMAND=$@

HERE="`dirname \"$0\"`"             # relative
HERE="`( cd \"$HERE\" && pwd )`"    # absolutized and normalized
if [ -z "$HERE" ] ; then
  exit 1
fi

source "${HERE}/../ci/controller_utils.sh"

source ./tools/azure-pipelines/debug_files_utils.sh
prepare_debug_files "$AGENT_JOBNAME"
export FLINK_LOG_DIR="$DEBUG_FILES_OUTPUT_DIR/flink-logs"
mkdir $FLINK_LOG_DIR || { echo "FAILURE: cannot create log directory '${FLINK_LOG_DIR}'." ; exit 1; }
sudo apt-get install -y moreutils

REAL_START_SECONDS=$(date +"%s")
REAL_END_SECONDS=$(date -d "$SYSTEM_PIPELINESTARTTIME + $SYSTEM_JOBTIMEOUT minutes" +"%s")
REAL_TIMEOUT_SECONDS=$(($REAL_END_SECONDS - $REAL_START_SECONDS))
KILL_SECONDS_BEFORE_TIMEOUT=$((2 * 60))

echo "Running command '$COMMAND' with a timeout of $(($REAL_TIMEOUT_SECONDS / 60)) minutes."

MAIN_PID_FILE="/tmp/uploading_watchdog_main.pid"

function timeout_watchdog() {
  # 95%
  sleep $(($REAL_TIMEOUT_SECONDS * 95 / 100))
  echo "=========================================================================================="
  echo "=== WARNING: This task took already 95% of the available time budget of $((REAL_TIMEOUT_SECONDS / 60)) minutes ==="
  echo "=========================================================================================="
  print_stacktraces | tee "$DEBUG_FILES_OUTPUT_DIR/jps-traces.0"

  # final stack trace and kill processes 1 min before timeout
  local secondsToKill=$(($REAL_END_SECONDS - $(date +"%s") - $KILL_SECONDS_BEFORE_TIMEOUT))
  if [[ $secondsToKill -lt 0 ]]; then
    secondsToKill=0
  fi
  sleep $(secondsToKill)
  print_stacktraces | tee "$DEBUG_FILES_OUTPUT_DIR/jps-traces.1"

  echo "============================="
  echo "=== WARNING: Killing task ==="
  echo "============================="
  pkill -P $(<$MAIN_PID_FILE) # kill descendants
  kill $(<$MAIN_PID_FILE)     # kill process itself

  exit 42
}

timeout_watchdog &
WATCHDOG_PID=$!

# ts from moreutils prepends the time to each line
( $COMMAND & PID=$! ; echo $PID >$MAIN_PID_FILE ; wait $PID ) | ts | tee $DEBUG_FILES_OUTPUT_DIR/watchdog
TEST_EXIT_CODE=${PIPESTATUS[0]}

# successful execution, cleanup watchdog related things
if [[ "$TEST_EXIT_CODE" == 0 ]]; then
  kill $WATCHDOG_PID
  rm $DEBUG_FILES_OUTPUT_DIR/watchdog
  rm -f $DEBUG_FILES_OUTPUT_DIR/jps-traces.*
fi

# properly forward exit code
exit $TEST_EXIT_CODE