471 lines
14 KiB
Bash
Executable file
471 lines
14 KiB
Bash
Executable file
#!/bin/bash
|
|
|
|
# Copyright (C) 2020 Kevin R Croft <krcroft@gmail.com>
|
|
# SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
##
|
|
# This script craws the current repo's GitHub workflow content.
|
|
# It fetches assets and logs from the most recent successful master
|
|
# run followed by the most recent (and possibly failing) current
|
|
# branch, which can also be a master branch.
|
|
# The goal of this script is two fold:
|
|
# - Provide a mechanized an automated way to fetch CI records.
|
|
# - Provide a rapid way to diff bad CI runs against master.
|
|
#
|
|
# This script requires a GitHub account in order to generate an
|
|
# auth-token. Simply run the script, it will provide instructions.
|
|
#
|
|
set -euo pipefail
|
|
shopt -s nullglob
|
|
|
|
# Fixed portion of the URL
|
|
declare -gr scheme="https://"
|
|
declare -gr authority="api.github.com"
|
|
|
|
# Colors
|
|
declare -gr bold="\\e[1m"
|
|
declare -gr red="\\e[31m"
|
|
declare -gr green="\\e[32m"
|
|
declare -gr yellow="\\e[33m"
|
|
declare -gr cyan="\\e[36m"
|
|
declare -gr reset="\\e[0m"
|
|
|
|
##
|
|
# Changes the working directory to that of the
|
|
# repository's root.
|
|
#
|
|
function cd_repo_root() {
|
|
if [[ "${in_root:-}" != "true" ]]; then
|
|
script_path="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
|
|
pushd "$script_path" > /dev/null
|
|
pushd "$(git rev-parse --show-toplevel)" > /dev/null
|
|
in_root=true
|
|
fi
|
|
}
|
|
|
|
##
|
|
# Determines the full GitHub repo name using the
|
|
# remote origin set in the repository's configuration
|
|
#
|
|
function init_baseurl() {
|
|
cd_repo_root
|
|
# Extract the full GitHub repo name from the origin
|
|
repo="$(git config --get remote.origin.url | sed 's/.*://;s/\.git$//;s^//[^/]*/^^')"
|
|
baseurl="$scheme/$authority/repos/$repo/actions"
|
|
declare -gr repo
|
|
declare -gr baseurl
|
|
}
|
|
|
|
##
|
|
# Determines the local branch name
|
|
#
|
|
function init_local_branch() {
|
|
cd_repo_root
|
|
local_branch="$(git rev-parse --abbrev-ref HEAD)"
|
|
declare -gr local_branch
|
|
}
|
|
|
|
##
|
|
# Sets up NETRC credentials for GitHub's v3 API server.
|
|
# NETRC is currently the most secure way to provide
|
|
# credentials to CURL, because it prevents other processes
|
|
# from inspect the process environment and cmd arguments;
|
|
# both of which can be found in /proc).
|
|
#
|
|
function init_netrc() {
|
|
# Check and setup the GitHub personal access token
|
|
local netrc="$HOME/.netrc"
|
|
if grep -q "^machine $authority" "$netrc"; then
|
|
echo "Found credentials for $authority in $netrc"
|
|
return
|
|
fi
|
|
# Get the username
|
|
clear
|
|
echo ""
|
|
echo "1. Enter your GitHub account email address, example: jcousteau@scuba.fr"
|
|
echo ""
|
|
read -r -p "GitHub account email: " username
|
|
|
|
# Get the token
|
|
clear
|
|
echo ""
|
|
echo "2. Login to GitHub and visit https://github.com/settings/tokens"
|
|
echo ""
|
|
echo "3. Click 'Generate new token' and set the 'public_repo' permission:"
|
|
echo ""
|
|
echo " [ ] repo Full control of private repos"
|
|
echo " [ ] repo:status Access commit status"
|
|
echo " [ ] repo_deployment Access deployment status"
|
|
echo " [X] public_repo Access public repositories"
|
|
echo " [ ] repo:invite Access repository invitations"
|
|
echo ""
|
|
echo " Type a name for the token then scroll down and click 'Generate Token'."
|
|
echo ""
|
|
echo "4. Copy & paste your token, for example: f7e6b2344bd2c1487597b61d77703527a692a072"
|
|
echo ""
|
|
# Deliberately echo the token so the user can verify its correctness
|
|
read -r -p "Personal Access Token: " token
|
|
|
|
# Add the credential to netrc
|
|
clear
|
|
if [[ -n "${username:-}" && -n "${token:-}" ]]; then
|
|
{
|
|
echo "machine $authority"
|
|
echo "login $username"
|
|
echo "password $token"
|
|
echo ""
|
|
} >> "$netrc"
|
|
echo "Added your credentials to $netrc"
|
|
# Ensure netrc is only readable by the user
|
|
chmod 600 "$netrc"
|
|
else
|
|
echo "Failed to setup $netrc or some of the credentials were empty!"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
##
|
|
# Makes strings suitable for directory and filenames
|
|
# - spaces => underscores
|
|
# - upper-case => lower-case
|
|
# - slashes => dashes
|
|
# - parenthesis => stripped
|
|
# - equals => dashes
|
|
#
|
|
function sanitize_name() {
|
|
local name="$1"
|
|
echo "$name" | sed 's/\(.*\)/\L\1/;s/ /_/g;s^/^-^g;s/[()]//g;s/=/-/g'
|
|
}
|
|
|
|
##
|
|
# Returns how old a file or directory is, in seconds.
|
|
#
|
|
function seconds_old() {
|
|
echo $(( "$(date +%s)" - "$(stat -L --format %Y "$1")" ))
|
|
}
|
|
|
|
##
|
|
# Creates a storage area for all content fetched by the script.
|
|
# This include cached JSON output (valid for 5 minutes), along
|
|
# with zip assets, log files, and diffs.
|
|
#
|
|
declare -g parent # used by the trap
|
|
function init_dirs() {
|
|
local repo_postfix
|
|
repo_postfix="$(basename "$repo")"
|
|
parent="/tmp/$repo_postfix-workflows-$USER"
|
|
assets_dir="$parent/assets"
|
|
cache_dir="$parent/cache"
|
|
diffs_dir="$parent/diffs"
|
|
logs_dir="$parent/logs"
|
|
declare -gr assets_dir
|
|
declare -gr cache_dir
|
|
declare -gr diffs_dir
|
|
declare -gr logs_dir
|
|
echo "Initializing storage area: $parent"
|
|
|
|
# Don't trust content from a prior interrupted run
|
|
if [[ -f "$parent/.interrupted" ]]; then
|
|
rm -rf "$parent"
|
|
fi
|
|
|
|
# Make the directories if they don't exist
|
|
for dir in "$assets_dir" "$cache_dir" "$diffs_dir" "$logs_dir"; do
|
|
if [[ ! -d "$dir" ]]; then
|
|
mkdir -p "$dir"
|
|
# Otherwise, purge content older than 5-minutes
|
|
else
|
|
for filename in "$dir"/*; do
|
|
if [[ "$(seconds_old "$filename")" -gt 300 ]]; then
|
|
rm -rf "$filename"
|
|
fi
|
|
done
|
|
fi
|
|
done
|
|
# If the user Ctrl-C'd the job, then some files might be
|
|
# partially written, so drop a breadcrumb to clean up next run.
|
|
# (we could just blow away the content here, but we want to
|
|
# let the user inspect content after interrupting the run.)
|
|
trap 'touch $parent/.interrupted' INT
|
|
}
|
|
|
|
##
|
|
# Ensures all pre-requisites are setup and have passed
|
|
# before we start making REST queries and writing files.
|
|
#
|
|
function init() {
|
|
init_baseurl
|
|
init_local_branch
|
|
init_netrc
|
|
init_dirs
|
|
}
|
|
|
|
##
|
|
# Downloads a file if we otherwise don't have it.
|
|
# (Note that the script on launch cleans up files older than
|
|
# 5 minutes, so most of the time we'll be downloading.)
|
|
#
|
|
function download() {
|
|
local url="$1"
|
|
local outfile="$2"
|
|
if [[ ! -f "$outfile" ]]; then
|
|
curl --silent \
|
|
--location \
|
|
--netrc \
|
|
"$url" \
|
|
-o "$outfile"
|
|
fi
|
|
}
|
|
|
|
##
|
|
# Unzips files inside their containing directory.
|
|
# Clobbers existing files.
|
|
#
|
|
function unpack() {
|
|
local zipfile="$1"
|
|
local zipdir
|
|
zipdir="$(dirname "$zipfile")"
|
|
pushd "$zipdir" > /dev/null
|
|
unzip -qq -o "$zipfile"
|
|
rm -f "$zipfile"
|
|
popd > /dev/null
|
|
}
|
|
|
|
##
|
|
# Constructs and fetches REST urls using our personal access
|
|
# token. Files are additionally hashed based on the REST URL
|
|
# and cached. This allows for rapid-rerunning without needing
|
|
# to hit GitHub's API again (for duplicate requests). This
|
|
# avoid us exceeding our repo limit on API calls/day.
|
|
#
|
|
function pull() {
|
|
# Buildup the REST URL by appending arguments
|
|
local url="$baseurl"
|
|
for element in "$@"; do
|
|
url="$url/$element"
|
|
done
|
|
local url_hash
|
|
url_hash="$(echo "$url" | md5sum | cut -f1 -d' ')"
|
|
local outfile="${cache_dir}/${url_hash}.json"
|
|
if [[ ! -f "$outfile" ]]; then
|
|
download "$url" "$outfile"
|
|
fi
|
|
cat "$outfile"
|
|
}
|
|
|
|
##
|
|
# Gets one or more keys from all records
|
|
#
|
|
function get_all() {
|
|
local container="$1"
|
|
local return_keys="$2"
|
|
jq -r '.'"$container"'[] | '"${return_keys}"
|
|
}
|
|
|
|
##
|
|
# Gets one or more return_key(s) from records that have
|
|
# matching search_key and search_value hits
|
|
#
|
|
function query() {
|
|
local container="$1"
|
|
local search_key="$2"
|
|
local search_value="$3"
|
|
local return_keys="$4"
|
|
jq -r --arg value "$search_value"\
|
|
'.'"${container}"'[] | if .'"${search_key}"' == $value then '"${return_keys}"' else empty end'
|
|
}
|
|
|
|
##
|
|
# Pulls the subset of active workflows from GitHub having
|
|
# path values that match the local repos filenames inside
|
|
# .github/workflows (otherwise there are 30+ workflows).
|
|
#
|
|
# The workflow numeric ID and textual name are stored
|
|
# in an associated array, respectively.
|
|
#
|
|
# API References:
|
|
# - https://developer.github.com/v3/actions/workflows/
|
|
# - GET /repos/:owner/:repo/actions/workflows
|
|
#
|
|
function fetch_workflows() {
|
|
unset workflows
|
|
declare -gA workflows
|
|
for workflow_path in ".github/workflows/"*.yml; do
|
|
local result
|
|
result="$(pull workflows \
|
|
| query workflows path "$workflow_path" '.id,.name')"
|
|
local id
|
|
id="${result%$'\n'*}"
|
|
local name
|
|
name="${result#*$'\n'}"
|
|
|
|
# Skip empty values and a couple master-only workflows
|
|
if [[ -z "${id:-}" \
|
|
|| -z "${name:-}" \
|
|
|| "$name" == "Config heavy" \
|
|
|| "$name" == "Coverity Scan" ]]; then
|
|
continue
|
|
fi
|
|
workflows["$id"]="$(sanitize_name "$name")"
|
|
done
|
|
}
|
|
|
|
##
|
|
# Fetches the first run identifier for a given workflow ID
|
|
# and branch name. The run ID is stored in the run_id variable.
|
|
#
|
|
# API References:
|
|
# - https://developer.github.com/v3/actions/workflow_runs
|
|
# - GET /repos/:owner/:repo/actions/runs/:run_id
|
|
#
|
|
function fetch_workflow_run() {
|
|
declare -g run_id
|
|
local workflow_id="$1"
|
|
local branch="$2"
|
|
# GET /repos/:owner/:repo/actions/workflows/:workflow_id/runs
|
|
run_id="$(pull workflows "$workflow_id" runs \
|
|
| query workflow_runs head_branch "$branch" '.id' \
|
|
| head -1)"
|
|
}
|
|
|
|
##
|
|
# Fetches artifact names and download URLs for a given run ID,
|
|
# and stored them in an assiciative array, respectively.
|
|
#
|
|
# API References:
|
|
# - https://developer.github.com/v3/actions/artifacts
|
|
# - GET /repos/:owner/:repo/actions/runs/:run_id/artifacts
|
|
#
|
|
function fetch_run_artifacts() {
|
|
unset artifacts
|
|
declare -gA artifacts
|
|
while read -r name; do
|
|
read -r url
|
|
sanitized_name="$(sanitize_name "$name")"
|
|
artifacts["$sanitized_name"]="$url"
|
|
done < <(pull runs "$run_id" artifacts \
|
|
| get_all artifacts '.name,.archive_download_url')
|
|
}
|
|
|
|
##
|
|
# Fetches the job IDs and job names for a given run ID.
|
|
# The job IDs and names are stored in an associative array,
|
|
# respectively.
|
|
#
|
|
# API References:
|
|
# - https://developer.github.com/v3/actions/workflow_jobs
|
|
# - GET /repos/:owner/:repo/actions/runs/:run_id/jobs
|
|
#
|
|
function fetch_run_jobs() {
|
|
unset jobs_array
|
|
declare -gA jobs_array
|
|
local conclusion="$1" # success or failure
|
|
while read -r id; do
|
|
read -r name
|
|
jobs_array["$id"]="$(sanitize_name "$name")"
|
|
done < <(pull runs "$run_id" jobs \
|
|
| query jobs conclusion "$conclusion" '.id,.name')
|
|
}
|
|
|
|
##
|
|
# Fetches a job's log, and saves it in the provided output
|
|
# filename. The logs prefix time-stamps are filtered for easier
|
|
# text processing.
|
|
#
|
|
# API References:
|
|
# - https://developer.github.com/v3/actions/workflow_jobs
|
|
# - GET /repos/:owner/:repo/actions/jobs/:job_id/logs
|
|
#
|
|
function fetch_job_log() {
|
|
local jid="$1"
|
|
local outfile="$2"
|
|
pull jobs "$jid" logs \
|
|
| sed 's/^.*Z[ \t]*//;s/:[[:digit:]]*:[[:digit:]]*://;s/\[/./g;s/\]/./g' \
|
|
> "$outfile"
|
|
}
|
|
|
|
##
|
|
# Crawl workflows, runs, and jobs for the master and current branch.
|
|
# While crawling, download assets and logs, and if a run failed, diff
|
|
# that log against the last successful master-equivalent having the same
|
|
# workflow and job type.
|
|
#
|
|
# TODO - Refactor into smaller functions and trying to flatten the loop depth.
|
|
# TODO - Improve the log differ to something that can lift out just the
|
|
# gcc/clang/vistual-studio warnings and errors, and diff them.
|
|
#
|
|
function main() {
|
|
# Setup all pre-requisites
|
|
init
|
|
echo "Comparing branch $local_branch with master"
|
|
echo ""
|
|
|
|
# Fetch the workflows, to be used throughout the run
|
|
fetch_workflows
|
|
|
|
# Step through each workflow
|
|
for workflow_id in "${!workflows[@]}"; do
|
|
workflow_name="${workflows[$workflow_id]}"
|
|
echo -e "${bold}${workflow_name}${reset} workflow [$workflow_id]"
|
|
|
|
# Within the workflows, we're interested in finding the newest subset of
|
|
# runs that match our current branch as well as the master branch.
|
|
for branch in master current; do
|
|
if [[ "$branch" == "current" ]]; then
|
|
branch_name="$local_branch"
|
|
else
|
|
branch_name="master"
|
|
fi
|
|
|
|
# Get the run identifier for the given workflow and branch
|
|
fetch_workflow_run "$workflow_id" "$branch_name"
|
|
if [[ -z "${run_id:-}" ]]; then
|
|
echo " \`- no runs found for $branch_name"
|
|
continue
|
|
fi
|
|
[[ "$branch" == "master" ]] && joiner="|-" || joiner="\`-"
|
|
echo " $joiner found latest $branch_name run [$run_id]"
|
|
|
|
# Download the artifacts produced during the selected run
|
|
fetch_run_artifacts
|
|
[[ "$branch" == "master" ]] && joiner="|" || joiner=" "
|
|
for artifact_name in "${!artifacts[@]}"; do
|
|
artifact_url="${artifacts[$artifact_name]}"
|
|
asset_file="$assets_dir/$workflow_name-$artifact_name-$branch.zip"
|
|
download "$artifact_url" "$asset_file"
|
|
unpack "$asset_file"
|
|
echo -e " $joiner - unpacking ${cyan}${artifact_name}${reset} asset"
|
|
done
|
|
|
|
# Download the logs for the jobs within the selected run
|
|
for conclusion in failure success; do
|
|
if ! fetch_run_jobs "$conclusion"; then
|
|
echo " \`- skipped $job_id $conclusion"
|
|
continue
|
|
fi
|
|
[[ "$conclusion" == "success" ]] && color="${green}" || color="${red}"
|
|
for job_id in "${!jobs_array[@]}"; do
|
|
job_name="${jobs_array[$job_id]}"
|
|
echo -e " $joiner - fetching ${color}${job_name}${reset} ${conclusion} log"
|
|
log_file="$logs_dir/$workflow_name-$job_name-$branch-$conclusion.log"
|
|
successful_master_log="$logs_dir/$workflow_name-$job_name-master-success.log"
|
|
fetch_job_log "$job_id" "$log_file"
|
|
|
|
# In the event we've found a failed job, try to diff it against a prior
|
|
# successful master job of the equivalent workflow and job-type.
|
|
if [[ "$conclusion" == "failure"
|
|
&& -f "$log_file"
|
|
&& -f "$successful_master_log" ]]; then
|
|
sanitized_branch_name="$(sanitize_name "$branch_name")"
|
|
diff_file="$diffs_dir/$workflow_name-$job_name-$sanitized_branch_name-vs-master.log"
|
|
diff "$log_file" "$successful_master_log" > "$diff_file" || true
|
|
echo -e " - diffed ${yellow}$diff_file${reset}"
|
|
fi
|
|
done # jobs_array
|
|
done # conclusion types
|
|
echo " $joiner"
|
|
done # branch types
|
|
done # workflows
|
|
}
|
|
|
|
main
|