From 20bf72751c865e2597ca89e6acb5a8066d2e81f4 Mon Sep 17 00:00:00 2001 From: Gergely Kis Date: Mon, 5 Sep 2022 11:55:51 +0200 Subject: [PATCH] Fix UTF-8 validation in static checks Use isutf8 instead of recode to detect invalid UTF-8 sequences. Also add the necessary dependencies to run the static checks locally using act (https://github.com/nektos/act) with the Medium size image. --- .github/workflows/static_checks.yml | 4 ++-- misc/scripts/file_format.sh | 27 ++++++++++++++++++++------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/.github/workflows/static_checks.yml b/.github/workflows/static_checks.yml index 3e84702d7f7..5b4de06e9e6 100644 --- a/.github/workflows/static_checks.yml +++ b/.github/workflows/static_checks.yml @@ -24,8 +24,8 @@ jobs: - name: Install dependencies run: | - sudo apt-get install -qq dos2unix recode clang-format-13 libxml2-utils - sudo update-alternatives --remove-all clang-format + sudo apt-get install -qq dos2unix recode clang-format-13 libxml2-utils python3-pip moreutils + sudo update-alternatives --remove-all clang-format || true sudo update-alternatives --install /usr/bin/clang-format clang-format /usr/bin/clang-format-13 100 sudo pip3 install black==22.3.0 pygments pytest diff --git a/misc/scripts/file_format.sh b/misc/scripts/file_format.sh index 731b3ee0051..1200b96ea0f 100755 --- a/misc/scripts/file_format.sh +++ b/misc/scripts/file_format.sh @@ -41,7 +41,7 @@ while IFS= read -rd '' f; do continue fi # Ensure that files are UTF-8 formatted. - recode UTF-8 "$f" 2> /dev/null + isutf8 "$f" >> utf8-validation.txt 2>&1 # Ensure that files have LF line endings and do not contain a BOM. dos2unix "$f" 2> /dev/null # Remove trailing space characters and ensures that files end @@ -51,15 +51,28 @@ done diff=$(git diff --color) -# If no diff has been generated all is OK, clean up, and exit. -if [ -z "$diff" ] ; then +# If no UTF-8 violations were collected and no diff has been +# generated all is OK, clean up, and exit. +if [ ! -s utf8-validation.txt ] && [ -z "$diff" ] ; then printf "Files in this commit comply with the formatting rules.\n" + rm -f utf8-violations.txt exit 0 fi -# A diff has been created, notify the user, clean up, and exit. -printf "\n*** The following differences were found between the code " -printf "and the formatting rules:\n\n" -echo "$diff" +# Violations detected, notify the user, clean up, and exit. +if [ -s utf8-validation.txt ] +then + printf "\n*** The following files contain invalid UTF-8 character sequences:\n\n" + cat utf8-validation.txt + rm -f utf8-validation.txt +fi + +if [ ! -z "$diff" ] +then + printf "\n*** The following differences were found between the code " + printf "and the formatting rules:\n\n" + echo "$diff" +fi + printf "\n*** Aborting, please fix your commit(s) with 'git commit --amend' or 'git rebase -i '\n" exit 1