2019-11-15 05:34:47 +01:00
#!/usr/bin/env bash
2021-07-16 14:15:31 +01:00
# SPDX-License-Identifier: MIT
2024-10-05 16:41:00 +02:00
# shellcheck disable=SC2016,SC2059
2019-11-15 05:34:47 +01:00
2020-06-09 15:03:35 +01:00
# This script is executed by GitHub Actions for every pull request opened.
2023-11-11 19:35:12 +01:00
# It currently accomplishes the following objectives:
2019-11-15 05:34:47 +01:00
#
# 1. Detect pages that were just copied (i.e. cp pages/{common,linux}/7z.md).
2024-01-06 19:13:20 +01:00
# 2. Detect English pages that were added in a platform specific directory although
2019-11-15 05:34:47 +01:00
# they already exist under 'common'.
2024-01-06 19:13:20 +01:00
# 4. Detect translated pages that do not exist as English pages yet.
2023-11-14 16:36:02 +01:00
# 5. Detect outdated pages. A page is marked as outdated when the number of
# commands differ from the number of commands in the English page or the
# contents of the commands differ from the English page.
# 6. Detect other miscellaneous anomalies in the pages folder.
2019-11-15 05:34:47 +01:00
#
# Results are printed to stdout, logs and errors to stderr.
#
# NOTE: must be run from the repository root directory to correctly work!
# NOTE: no `set -e`, failure of this script should not invalidate the build.
2024-11-02 22:39:21 +01:00
VERBOSE = false
while getopts ":v" opt; do
case $opt in
v)
VERBOSE = true
; ;
*)
echo "This argument is not valid for this script."
; ;
esac
done
if [ [ $VERBOSE = = true ] ] ; then
DEBUG_LOG = "debug.log"
rm -f " $DEBUG_LOG " && touch " $DEBUG_LOG "
exec { BASH_XTRACEFD} > " $DEBUG_LOG "
export BASH_XTRACEFD
set -x
fi
2019-11-15 05:34:47 +01:00
# Check for duplicated pages.
function check_duplicates {
2024-10-07 11:32:11 +02:00
local page = " $1 " # page path in the format 'pages<.language_code>/platform/pagename.md'
2019-11-15 05:34:47 +01:00
local parts
readarray -td'/' parts < <( echo -n " $page " )
2024-10-07 11:32:11 +02:00
local language_folder = " ${ parts [0] } "
if [ [ $language_folder != "pages" ] ] ; then # only check for duplicates in English
2024-01-03 14:34:10 +01:00
return 1
fi
2024-10-07 11:32:11 +02:00
local platform = " ${ parts [1] } "
local file = " ${ parts [2] } "
2019-11-15 05:34:47 +01:00
2024-10-07 11:32:11 +02:00
case $platform in
2024-01-06 19:13:20 +01:00
common) # skip common-platform
2019-11-15 05:34:47 +01:00
; ;
*) # check if page already exists under common
2023-10-26 18:41:24 +02:00
if [ [ -f " pages/common/ $file " ] ] ; then
2019-11-18 23:11:58 +01:00
printf " \x2d $MSG_EXISTS " " $page " 'common'
2019-11-15 05:34:47 +01:00
fi
; ;
esac
}
2023-11-12 10:48:15 +01:00
function check_missing_english_page( ) {
2024-10-07 11:32:11 +02:00
local page = " $1 "
2023-11-12 10:48:15 +01:00
local english_page = " pages/ ${ page #pages* \/ } "
2023-11-14 16:36:02 +01:00
2024-10-07 11:32:11 +02:00
if [ [ $page = = " $english_page " ] ] ; then
2023-11-14 16:36:02 +01:00
return 1
fi
2024-10-07 11:32:11 +02:00
if [ [ ! -f $english_page ] ] ; then
2023-11-12 10:48:15 +01:00
printf " \x2d $MSG_NOT_EXISTS " " $page " " $english_page "
fi
}
2023-12-27 07:36:40 +01:00
function count_commands( ) {
local file = " $1 "
local regex = " $2 "
grep -c " $regex " " $file "
}
function strip_commands( ) {
local file = " $1 "
local regex = " $2 "
local stripped_commands = ( )
mapfile -t stripped_commands < <(
grep " $regex " " $file " |
sed 's/{{[^}]*}}/{{}}/g' |
sed 's/<[^>]*>//g' |
2023-12-27 22:17:30 +01:00
sed 's/([^)]*)//g' |
2023-12-27 07:36:40 +01:00
sed 's/"[^"]*"/""/g' |
sed "s/'[^']*'//g" |
sed 's/`//g'
)
printf "%s\n" " ${ stripped_commands [*] } "
}
2023-11-14 16:36:02 +01:00
function check_outdated_page( ) {
2024-10-07 11:32:11 +02:00
local page = " $1 "
2023-11-14 16:36:02 +01:00
local english_page = " pages/ ${ page #pages* \/ } "
2023-11-17 18:45:25 +01:00
local command_regex = '^`[^`]\+`$'
2023-11-14 16:36:02 +01:00
2024-10-07 11:32:11 +02:00
if [ [ $page = = " $english_page " || ! -f $english_page ] ] ; then
2023-11-14 16:36:02 +01:00
return 1
fi
2024-10-07 11:32:11 +02:00
local english_commands commands english_commands_as_string commands_as_string
english_commands = " $( count_commands " $english_page " " $command_regex " ) "
commands = " $( count_commands " $page " " $command_regex " ) "
english_commands_as_string = " $( strip_commands " $english_page " " $command_regex " ) "
commands_as_string = " $( strip_commands " $page " " $command_regex " ) "
if [ [ $english_commands != " $commands " ] ] ; then
2023-11-14 16:36:02 +01:00
printf " \x2d $MSG_OUTDATED " " $page " "based on number of commands"
elif [ [ " $english_commands_as_string " != " $commands_as_string " ] ] ; then
printf " \x2d $MSG_OUTDATED " " $page " "based on the command contents itself"
fi
}
2024-09-03 07:43:49 +02:00
function check_more_info_link( ) {
local page = $1
2024-10-07 11:32:11 +02:00
if grep -q " $page " "more-info-links.txt" ; then
2024-09-03 07:43:49 +02:00
printf " \x2d $MSG_MORE_INFO " " $page "
fi
}
2024-09-10 19:21:59 +02:00
function check_page_title( ) {
local page = $1
2024-10-07 11:32:11 +02:00
if grep -q " $page " "page-titles.txt" ; then
2024-09-10 19:21:59 +02:00
printf " \x2d $MSG_PAGE_TITLE " " $page "
fi
}
2019-11-15 05:34:47 +01:00
# Look at git diff and check for copied/duplicated pages.
function check_diff {
local git_diff
local line
local entry
2024-10-07 11:32:11 +02:00
git_diff = " $( git diff --name-status --find-copies-harder --diff-filter= ACM origin/main -- pages*/) "
2019-11-15 05:34:47 +01:00
2023-10-26 18:41:24 +02:00
if [ [ -n $git_diff ] ] ; then
2019-11-15 05:34:47 +01:00
echo -e " Check PR: git diff:\n $git_diff " >& 2
else
2019-11-18 23:11:58 +01:00
echo 'Check PR: git diff looks fine, no interesting changes detected.' >& 2
2019-11-15 05:34:47 +01:00
return 0
fi
2024-09-10 18:27:48 +02:00
python3 scripts/set-more-info-link.py -Sn > more-info-links.txt
2024-09-10 19:21:59 +02:00
python3 scripts/set-page-title.py -Sn > page-titles.txt
2024-09-10 18:27:48 +02:00
2024-10-05 16:41:00 +02:00
while read -r line; do
2019-11-15 05:34:47 +01:00
readarray -td$'\t' entry < <( echo -n " $line " )
local change = " ${ entry [0] } "
local file1 = " ${ entry [1] } "
local file2 = " ${ entry [2] } "
case " $change " in
C*) # file2 is a copy of file1
local percentage = ${ change #C }
percentage = ${ percentage #0 }
percentage = ${ percentage #0 }
2019-11-18 23:11:58 +01:00
printf " \x2d $MSG_IS_COPY " " $file2 " " $file1 " " $percentage "
2019-11-15 05:34:47 +01:00
; ;
2024-01-06 19:13:20 +01:00
A) # file1 was newly added
2019-11-15 05:34:47 +01:00
check_duplicates " $file1 "
2023-11-12 10:48:15 +01:00
check_missing_english_page " $file1 "
2023-11-14 16:36:02 +01:00
check_outdated_page " $file1 "
2024-09-03 07:43:49 +02:00
check_more_info_link " $file1 "
2024-09-10 19:21:59 +02:00
check_page_title " $file1 "
2024-01-08 20:38:24 +01:00
; ;
2024-01-06 19:13:20 +01:00
M) # file1 was modified
check_missing_english_page " $file1 "
check_outdated_page " $file1 "
2024-09-03 07:43:49 +02:00
check_more_info_link " $file1 "
2024-09-10 19:21:59 +02:00
check_page_title " $file1 "
2019-11-15 05:34:47 +01:00
; ;
esac
done <<< " $git_diff "
2024-10-05 16:41:00 +02:00
rm more-info-links.txt page-titles.txt
2019-11-15 05:34:47 +01:00
}
# Recursively check the pages/ folder for anomalies.
function check_structure {
2019-11-18 23:11:58 +01:00
for platform in $PLATFORMS ; do
2023-10-26 18:41:24 +02:00
if [ [ ! -d " pages/ $platform " ] ] ; then
2019-11-18 23:11:58 +01:00
printf " \x2d $MSG_NOT_DIR " " pages/ $platform "
2019-11-15 05:34:47 +01:00
else
2019-11-18 23:11:58 +01:00
for page in " pages/ $platform " /*; do
2023-10-26 18:41:24 +02:00
if [ [ ! -f $page ] ] ; then
2019-11-18 23:11:58 +01:00
printf " \x2d $MSG_NOT_FILE " " $page "
2023-10-26 18:41:24 +02:00
elif [ [ ${ page : (-3) } != ".md" ] ] ; then
2019-11-18 23:11:58 +01:00
printf " \x2d $MSG_NOT_MD " " $page "
2019-11-15 05:34:47 +01:00
fi
done
fi
done
}
###################################
# MAIN
###################################
2024-01-06 19:13:20 +01:00
MSG_EXISTS = 'The page `%s` already exists in the `%s` directory.\n'
2023-11-12 10:48:15 +01:00
MSG_NOT_EXISTS = 'The page `%s` does not exists as English page `%s` yet.\n'
2024-10-28 20:48:53 +01:00
MSG_OUTDATED = 'The page `%s` is outdated, %s, compared to the English page.\n'
2019-11-18 23:11:58 +01:00
MSG_IS_COPY = 'The page `%s` seems to be a copy of `%s` (%d%% matching).\n'
MSG_NOT_DIR = 'The file `%s` does not look like a directory.\n'
MSG_NOT_FILE = 'The file `%s` does not look like a regular file.\n'
MSG_NOT_MD = 'The file `%s` does not have a `.md` extension.\n'
2024-10-28 20:48:53 +01:00
MSG_MORE_INFO = 'The page `%s` has a more info link that does not match the one in the English page. Please check the "More information:" translation as well using https://github.com/tldr-pages/tldr/blob/main/contributing-guides/translation-templates/more-info-link.md.\n'
MSG_PAGE_TITLE = 'The page `%s` has a page title that does not match the one in the English page.\n'
2019-11-18 23:11:58 +01:00
2019-11-27 18:25:16 +01:00
PLATFORMS = $( ls pages/)
2019-11-18 23:11:58 +01:00
2023-10-26 18:41:24 +02:00
if [ [ $CI = = true && $GITHUB_REPOSITORY = = "tldr-pages/tldr" && $PULL_REQUEST_ID != "" ] ] ; then
2019-11-15 05:34:47 +01:00
check_diff
check_structure
else
echo 'Not a pull request, refusing to run.' >& 2
exit 0
fi