[ruby/json] SIMD: Match control char and double quote in one pass
authorJean Boussier <[email protected]>
Mon, 28 Apr 2025 13:23:48 +0000 (28 15:23 +0200)
committerJean Boussier <[email protected]>
Wed, 30 Apr 2025 06:12:41 +0000 (30 08:12 +0200)
`c < 32 || c == 34` is equivalent to `c ^ 2 < 33`.

Found in: https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/

The gain seem mostly present on micro-benchmark, and even there aren't
very consistent, but it's never slower.

```
== Encoding long string (124001 bytes)
ruby 3.4.2 (2025-02-15 revision https://github.com/ruby/json/commit/d2930f8e7a) +YJIT +PRISM [arm64-darwin24]
Warming up --------------------------------------
               after     5.295k i/100ms
Calculating -------------------------------------
               after     55.796k (± 3.4%) i/s   (17.92 μs/i) -    280.635k in   5.035690s

Comparison:
              before:    49840.7 i/s
               after:    55795.8 i/s - 1.12x  faster
```

https://github.com/ruby/json/commit/034c5debd8

ext/json/generator/generator.c

index c090617..3112ef1 100644 (file)
@@ -320,14 +320,12 @@ static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
 {
     uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
 
-    const uint8x16_t lower_bound = vdupq_n_u8(' '); 
-    const uint8x16_t backslash   = vdupq_n_u8('\\');
-    const uint8x16_t dblquote    = vdupq_n_u8('\"');
+    // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
+    // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
+    const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
 
-    uint8x16_t too_low       = vcltq_u8(chunk, lower_bound);
-    uint8x16_t has_backslash = vceqq_u8(chunk, backslash);
-    uint8x16_t has_dblquote  = vceqq_u8(chunk, dblquote);
-    uint8x16_t needs_escape  = vorrq_u8(too_low, vorrq_u8(has_backslash, has_dblquote));
+    uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
+    uint8x16_t needs_escape  = vorrq_u8(too_low_or_dbl_quote, has_backslash);
 
     return neon_match_mask(needs_escape);
 }
@@ -467,14 +465,11 @@ static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
 {
     __m128i chunk         = _mm_loadu_si128((__m128i const*)ptr);
 
-    const __m128i lower_bound = _mm_set1_epi8(' '); 
-    const __m128i backslash   = _mm_set1_epi8('\\');
-    const __m128i dblquote    = _mm_set1_epi8('\"');
-
-    __m128i too_low       = _mm_cmplt_epu8(chunk, lower_bound);
-    __m128i has_backslash = _mm_cmpeq_epi8(chunk, backslash);
-    __m128i has_dblquote  = _mm_cmpeq_epi8(chunk, dblquote);
-    __m128i needs_escape  = _mm_or_si128(too_low, _mm_or_si128(has_backslash, has_dblquote));
+    // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
+    // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
+    __m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
+    __m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
+    __m128i needs_escape  = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
     return _mm_movemask_epi8(needs_escape);
 }