[RFC PATCH] scripts/prettify: add script to prettify code

From: zsugabubus
Date: Wed Apr 22 2020 - 16:10:48 EST


Mix and mess of tabs and spaces are all around in the code. This script
is for here to correct those >1.5 million lines where code does not match
kernel coding style.

Usage of `prettify` is pretty simple: It expects a directory you would
like to prettify (default: .) then watch it go.

---
scripts/prettify | 94 ++++++++++++++++++++++++++++++++++++++++++++++++
scripts/retab | 28 +++++++++++++++
2 files changed, 122 insertions(+)
create mode 100755 scripts/prettify
create mode 100755 scripts/retab

diff --git a/scripts/prettify b/scripts/prettify
new file mode 100755
index 000000000..3c725ec5c
--- /dev/null
+++ b/scripts/prettify
@@ -0,0 +1,94 @@
+#!/usr/bin/sh -eu
+# SPDX-License-Identifier: GPL-3.0-or-later
+TREE=${1:-.}
+
+XARGS="xargs -rP $(nproc 2>/dev/null || echo 2) -n 5000"
+
+export RETAB_TABWIDTH=8
+
+# Using BEGIN and END we can easily filter which lines we want to retab.
+# However, we have to hack a little bit more to ignore characters from the
+# line, e.g. ones inside comments. These space characters, we do not
+# want to retab, are substituted with '\n' (an extremal character) at
+# the beginning and after retab finished they are substituted back.
+
+trap '
+echo >&2 warning: sed temporary files may remained. Please remove them manually:
+find >&2 "$TREE" -name "sed*"
+' INT TERM
+
+true &&
+echo 'Pass: Enforce Coding Standards' &&
+find "$TREE" -type f -name '*.[ch]' \
+| $XARGS sed -i -E '
+# Trailing white.
+s/\s{1,}$//
+# Move # to the first column.
+s/^(\s{1,})#(\s*)/#\1\2/
+
+# Spacing around keywords.
+s/^(\s{1,}(if|else if|switch|for|while))\s*\(\s*/\1 (/
+s/(sizeof|typeof|alignof|__attribute__)\s*\(\s*/\1(/
+
+# Open bracket spacing.
+/^\s*(if|\}?\s*else if|switch|for|while) / s/\s*\)$/)/;
+/^\s*(if|\}?\s*else if|switch|for|while) / s/\s*\)\s*\{$/) {/;
+s/\}else/} else/
+
+# // -> /* */ comments.
+s/((^|\s*[a-z_]{1,}:|.*;)\s*)\/\/\s*(.*)$/\1\/* \3 *\//
+'
+
+# Converts the random sequence of spaces and tabs at the first lines of
+# #defines to tabs and only keeps spaces if needed for the alignment.
+# When people did not use tabs for indentation, at 7/8 of the cases, some
+# spaces will remain, because they put their code at non-tabstop column. Hurray!
+# Maybe it could be corrected in another pass that aligns whole blocks.
+true &&
+echo 'Pass: Retab #defines' &&
+export RETAB_BEGIN='/^#\s*define\s*[A-Za-z_]{1,}.*$/ {
+:right
+ s/((\/\*|\().*) /\1\n/g
+t right;' &&
+export RETAB_END='s/\n/ /g; }' &&
+find "$TREE" -type f -name '*.h' \
+| $XARGS ./retab
+
+# Convert space indentation to tab indentation.
+true &&
+echo 'Pass: Retab Indentations' &&
+export RETAB_BEGIN='/^[ \t]{2,}[^ \t*]/ {
+:loop;
+ s/^([ \t]*[^ ]{1,}) /\1\n/g;
+t loop;' &&
+export RETAB_END='s/\n/ /g; }' &&
+find "$TREE" -type f -name '*.[ch]' \
+| $XARGS ./retab
+
+# Someone people cannot fking count to 8... or more precisely to 1 tab, so they
+# hit space 6 or 7 times (randomly) to indent their codes. Note that these
+# partial tabs could not be converted to tabs in the previous passes.
+true &&
+echo 'Pass: ++Space -> Tab at Line Starts' &&
+find "$TREE" -type f -name '*.[ch]' \
+| $XARGS sed -i -E '
+:next
+# Skip fors spanning multiple lines.
+/^\s*for .*;$/ {
+ n; n; b next
+}
+
+# Repeat replace on current line.
+:rep
+ s/^(\t{,4}) {5,7}([_a-zA-Z{}]|\/\*)/\1\t\2/g
+t rep
+
+# Skip to the next statement.
+/^$|\*\/$|[){};]$/ { n; b next }
+:skip
+/[{};]$/! { n; b skip }
+'
+
+trap '' INT TERM
+
+echo 'Done. :)'
diff --git a/scripts/retab b/scripts/retab
new file mode 100755
index 000000000..caf1cd58a
--- /dev/null
+++ b/scripts/retab
@@ -0,0 +1,28 @@
+#!/usr/bin/sh -eu
+# SPDX-License-Identifier: GPL-3.0-or-later
+# [ RETAB_TABWIDTH=8 ] sed "[ $RETAB_BEGIN ] { retab logic } [ $RETAB_END ]" [ FILE... ]
+
+: ${RETAB_TABWIDTH:=8}
+
+IFS=
+# An extremal character.
+marker=$(printf '\x01')
+
+exec sed --in-place -E "${RETAB_BEGIN:-}"'{
+ # Mark tabstops.
+ :tabstop
+ s/^(([^\t]{'"$RETAB_TABWIDTH"'}|.*\t)*[^\t]{'"$((RETAB_TABWIDTH-1))"'}) /\1'"$marker"'/
+ t tabstop
+
+ # Do replace space with a one-width tabulator if it would be the only change
+ # in that whitespace chunk.
+ s/([^ ])'"$marker"'([ \t]*[^ \t'"$marker"'])/\1 \2/g
+
+ # Place tabulators at tabstops.
+ s/ {,'"$((RETAB_TABWIDTH-1))"'}'"$marker"'/\t/g
+
+ # Remove needless space(s) before tabulators.
+ :spc
+ s/^(([^\t]{'"$RETAB_TABWIDTH"'}|.*\t)*[^\t]{,'"$((RETAB_TABWIDTH-2))"'}) \t/\1\t/g
+ t spc
+};'"${RETAB_END:-}" -- "$@"
--
2.26.1