From 57ec53b8203f2227889160f5c32d14c9f18a02b6 Mon Sep 17 00:00:00 2001
From: ttozzi <devttozzi@gmail.com>
Date: Fri, 14 Mar 2025 04:51:30 +0900
Subject: [PATCH] Unify whitespace handling in linter and formatter

---
 Sources/SwiftFormat/PrettyPrint/Comment.swift | 26 ++++++++++++++-----
 .../PrettyPrint/WhitespaceLinter.swift        | 10 +------
 .../PrettyPrint/CommentTests.swift            | 13 ++++++++++
 .../PrettyPrint/WhitespaceLintTests.swift     | 25 ++++++++++++++++++
 4 files changed, 59 insertions(+), 15 deletions(-)

diff --git a/Sources/SwiftFormat/PrettyPrint/Comment.swift b/Sources/SwiftFormat/PrettyPrint/Comment.swift
index 43616a5b4..76279694c 100644
--- a/Sources/SwiftFormat/PrettyPrint/Comment.swift
+++ b/Sources/SwiftFormat/PrettyPrint/Comment.swift
@@ -21,13 +21,27 @@ extension StringProtocol {
   /// - Returns: The string with trailing whitespace removed.
   func trimmingTrailingWhitespace() -> String {
     if isEmpty { return String() }
-    let scalars = unicodeScalars
-    var idx = scalars.index(before: scalars.endIndex)
-    while scalars[idx].properties.isWhitespace {
-      if idx == scalars.startIndex { return String() }
-      idx = scalars.index(before: idx)
+    let utf8Array = Array(utf8)
+    var idx = utf8Array.endIndex - 1
+    while utf8Array[idx].isWhitespace {
+      if idx == utf8Array.startIndex { return String() }
+      idx -= 1
+    }
+    return String(decoding: utf8Array[...idx], as: UTF8.self)
+  }
+}
+
+extension UTF8.CodeUnit {
+  /// Checks if the UTF-8 code unit represents a whitespace character.
+  ///
+  /// - Returns: `true` if the code unit represents a whitespace character, otherwise `false`.
+  var isWhitespace: Bool {
+    switch self {
+    case UInt8(ascii: " "), UInt8(ascii: "\n"), UInt8(ascii: "\t"), UInt8(ascii: "\r"), /*VT*/ 0x0B, /*FF*/ 0x0C:
+      return true
+    default:
+      return false
     }
-    return String(String.UnicodeScalarView(scalars[...idx]))
   }
 }
 
diff --git a/Sources/SwiftFormat/PrettyPrint/WhitespaceLinter.swift b/Sources/SwiftFormat/PrettyPrint/WhitespaceLinter.swift
index 30f733952..d9d5178a3 100644
--- a/Sources/SwiftFormat/PrettyPrint/WhitespaceLinter.swift
+++ b/Sources/SwiftFormat/PrettyPrint/WhitespaceLinter.swift
@@ -339,16 +339,8 @@ public class WhitespaceLinter {
     startingAt offset: Int,
     in data: [UTF8.CodeUnit]
   ) -> ArraySlice<UTF8.CodeUnit> {
-    func isWhitespace(_ char: UTF8.CodeUnit) -> Bool {
-      switch char {
-      case UInt8(ascii: " "), UInt8(ascii: "\n"), UInt8(ascii: "\t"), UInt8(ascii: "\r"), /*VT*/ 0x0B, /*FF*/ 0x0C:
-        return true
-      default:
-        return false
-      }
-    }
     guard
-      let whitespaceEnd = data[offset...].firstIndex(where: { !isWhitespace($0) })
+      let whitespaceEnd = data[offset...].firstIndex(where: { !$0.isWhitespace })
     else {
       return data[offset..<data.endIndex]
     }
diff --git a/Tests/SwiftFormatTests/PrettyPrint/CommentTests.swift b/Tests/SwiftFormatTests/PrettyPrint/CommentTests.swift
index 8d843a4c5..9a0118fd6 100644
--- a/Tests/SwiftFormatTests/PrettyPrint/CommentTests.swift
+++ b/Tests/SwiftFormatTests/PrettyPrint/CommentTests.swift
@@ -1094,4 +1094,17 @@ final class CommentTests: PrettyPrintTestCase {
 
     assertPrettyPrintEqual(input: input, expected: input, linelength: 80)
   }
+
+  func testUnexpectedUnicodeCharacters() {
+    let input =
+      """
+      // Hello World\u{2028}
+      // Hello\u{20}\u{2028}World
+      // Hello World\u{2028}\u{2029}\u{2029}
+      // Hello World\u{20}\u{20}\u{20}\u{2028}
+
+      """
+
+    assertPrettyPrintEqual(input: input, expected: input, linelength: 80)
+  }
 }
diff --git a/Tests/SwiftFormatTests/PrettyPrint/WhitespaceLintTests.swift b/Tests/SwiftFormatTests/PrettyPrint/WhitespaceLintTests.swift
index 2e20e4c89..2ffdb3afe 100644
--- a/Tests/SwiftFormatTests/PrettyPrint/WhitespaceLintTests.swift
+++ b/Tests/SwiftFormatTests/PrettyPrint/WhitespaceLintTests.swift
@@ -255,4 +255,29 @@ final class WhitespaceLintTests: WhitespaceTestCase {
       ]
     )
   }
+
+  func testUnexpectedUnicodeCharacters() {
+    assertWhitespaceLint(
+      input: """
+        // Hello World\u{2028}
+        // Hello\u{20}\u{2028}World
+        // Hello World\u{2028}\u{2029}\u{2029}
+        // Hello World              \u{2028}
+        // Hello World\u{2028}1️⃣\u{20}\u{20}\u{20}
+
+        """,
+      expected: """
+        // Hello World\u{2028}
+        // Hello\u{20}\u{2028}World
+        // Hello World\u{2028}\u{2029}\u{2029}
+        // Hello World              \u{2028}
+        // Hello World\u{2028}
+
+        """,
+      linelength: 30,
+      findings: [
+        FindingSpec("1️⃣", message: "remove trailing whitespace")
+      ]
+    )
+  }
 }