[PATCH RESEND] Add comprehensive detection and automatic fixing capability for full-width (Unicode) characters that are commonly mistaken for ASCII punctuation marks. This helps catch input method editor artifacts that can cause compilation errors or formatting issues.

From: Morduang Zang
Date: Fri Aug 15 2025 - 03:56:41 EST


The implementation detects 25 types of full-width characters:
- Basic punctuation: ;,。()!?: 
- Programming brackets: []{}<>
- Assignment and comparison: =
- Arithmetic operators: +-*/\
- Other programming symbols: %#&|

Detection covers three areas:
1. Code lines (lines starting with '+') - FULLWIDTH_CHARS
2. Commit messages - FULLWIDTH_CHARS_COMMIT
3. Subject lines - FULLWIDTH_CHARS_SUBJECT

Example usage:
./scripts/checkpatch.pl my_patch.patch
./scripts/checkpatch.pl --fix my_patch.patch
./scripts/checkpatch.pl --fix-inplace my_source.c

Signed-off-by: Morduang Zang <zhangdandan@xxxxxxxxxxxxx>
Signed-off-by: Wangyuli <wangyuli@xxxxxxxxxxxxx>
---
scripts/checkpatch.pl | 84 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 84 insertions(+)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index e722dd6fa8ef..f4cb547a470b 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -75,6 +75,41 @@ my $git_command ='export LANGUAGE=en_US.UTF-8; git';
my $tabsize = 8;
my ${CONFIG_} = "CONFIG_";

+# Full-width character mappings (UTF-8 byte sequences to ASCII)
+my %fullwidth_chars = (
+ # Basic punctuation
+ "\xef\xbc\x9b" => [";", "semicolon", ";"],
+ "\xef\xbc\x8c" => [",", "comma", ","],
+ "\xe3\x80\x82" => [".", "period", "。"],
+ "\xef\xbc\x88" => ["(", "opening parenthesis", "("],
+ "\xef\xbc\x89" => [")", "closing parenthesis", ")"],
+ "\xef\xbc\x81" => ["!", "exclamation mark", "!"],
+ "\xef\xbc\x9f" => ["?", "question mark", "?"],
+ "\xef\xbc\x9a" => [":", "colon", ":"],
+ "\xe3\x80\x80" => [" ", "space", " "],
+ # Programming brackets
+ "\xef\xbc\xbb" => ["[", "left square bracket", "["],
+ "\xef\xbc\xbd" => ["]", "right square bracket", "]"],
+ "\xef\xbd\x9b" => ["{", "left curly bracket", "{"],
+ "\xef\xbd\x9d" => ["}", "right curly bracket", "}"],
+ "\xef\xbc\x9c" => ["<", "less-than sign", "<"],
+ "\xef\xbc\x9e" => [">", "greater-than sign", ">"],
+ # Assignment and comparison
+ "\xef\xbc\x9d" => ["=", "equals sign", "="],
+ # Arithmetic operators
+ "\xef\xbc\x8b" => ["+", "plus sign", "+"],
+ "\xef\xbc\x8d" => ["-", "minus sign", "-"],
+ "\xef\xbc\x8a" => ["*", "asterisk", "*"],
+ "\xef\xbc\x8f" => ["/", "solidus", "/"],
+ "\xef\xbc\xbc" => ["\\", "reverse solidus", "\"],
+ # Other programming symbols
+ "\xef\xbc\x85" => ["%", "percent sign", "%"],
+ "\xef\xbc\x83" => ["#", "number sign", "#"],
+ "\xef\xbc\x86" => ["&", "ampersand", "&"],
+ "\xef\xbd\x9c" => ["|", "vertical line", "|"],
+);
+my $fullwidth_pattern = join('|', map { quotemeta($_) } keys %fullwidth_chars);
+
my %maybe_linker_symbol; # for externs in c exceptions, when seen in *vmlinux.lds.h

sub help {
@@ -1019,6 +1054,40 @@ sub read_words {
return 0;
}

+# Check for full-width characters and optionally fix them
+sub check_fullwidth_chars {
+ my ($line, $context, $warning_type, $apply_fix, $fixlinenr, $fixed_ref, $herecurr) = @_;
+ my @found_chars = ();
+ my $fixed_line = $line;
+ my $has_fixes = 0;
+
+ return 0 unless $line =~ /$fullwidth_pattern/o;
+
+ if ($apply_fix) {
+ $fixed_line =~ s/($fullwidth_pattern)/$fullwidth_chars{$1}[0]/ge;
+ $has_fixes = ($fixed_line ne $line);
+ }
+
+ while ($line =~ /($fullwidth_pattern)/go) {
+ my $fullwidth_byte_seq = $1;
+ if (exists $fullwidth_chars{$fullwidth_byte_seq}) {
+ my ($ascii_char, $name, $fullwidth_char) = @{$fullwidth_chars{$fullwidth_byte_seq}};
+ push @found_chars, "Full-width $name ($fullwidth_char) found$context, use ASCII $name ($ascii_char) instead";
+ }
+ }
+
+ if (@found_chars) {
+ foreach my $msg (@found_chars) {
+ WARN($warning_type, $msg . "\n" . $herecurr);
+ }
+ if ($apply_fix && $has_fixes && defined $fixed_ref) {
+ $fixed_ref->[$fixlinenr] = $fixed_line;
+ }
+ }
+
+ return scalar @found_chars;
+}
+
my $const_structs;
if (show_type("CONST_STRUCT")) {
read_words(\$const_structs, $conststructsfile)
@@ -2961,6 +3030,11 @@ sub process {
$commit_log_has_diff = 1;
}

+# Check for full-width characters in commit message
+ if ($in_commit_log && show_type("FULLWIDTH_CHARS_COMMIT")) {
+ check_fullwidth_chars($rawline, " in commit message", "FULLWIDTH_CHARS_COMMIT", 0, 0, undef, $herecurr);
+ }
+
# Check for incorrect file permissions
if ($line =~ /^new (file )?mode.*[7531]\d{0,2}$/) {
my $permhere = $here . "FILE: $realfile\n";
@@ -3266,6 +3340,11 @@ sub process {
"A patch subject line should describe the change not the tool that found it\n" . $herecurr);
}

+# Check for full-width characters in Subject line
+ if ($in_header_lines && $line =~ /^Subject:/i && show_type("FULLWIDTH_CHARS_SUBJECT")) {
+ check_fullwidth_chars($rawline, " in subject line", "FULLWIDTH_CHARS_SUBJECT", 0, 0, undef, $herecurr);
+ }
+
# Check for Gerrit Change-Ids not in any patch context
if ($realfile eq '' && !$has_patch_separator && $line =~ /^\s*change-id:/i) {
if (ERROR("GERRIT_CHANGE_ID",
@@ -3974,6 +4053,11 @@ sub process {
}
}

+# check for full-width characters (full-width punctuation marks, etc.)
+ if ($rawline =~ /^\+/ && show_type("FULLWIDTH_CHARS")) {
+ check_fullwidth_chars($rawline, "", "FULLWIDTH_CHARS", $fix, $fixlinenr, \@fixed, $herecurr);
+ }
+
# check multi-line statement indentation matches previous line
if ($perl_version_ok &&
$prevline =~ /^\+([ \t]*)((?:$c90_Keywords(?:\s+if)\s*)|(?:$Declare\s*)?(?:$Ident|\(\s*\*\s*$Ident\s*\))\s*|(?:\*\s*)*$Lval\s*=\s*$Ident\s*)\(.*(\&\&|\|\||,)\s*$/) {
--
2.20.1