diff options
author | Jean Boussier <[email protected]> | 2024-11-06 12:58:23 +0100 |
---|---|---|
committer | Jean Boussier <[email protected]> | 2024-11-06 23:31:30 +0100 |
commit | d188a6883fead74d3468da96c0e3d05efd759a1e (patch) | |
tree | 10f6bb4fa568772be17e200f2d7db780696032b1 /ext/json/parser/parser.rl | |
parent | 29d76d8c8b9e45f51a2fa6819b3854f2423239b0 (diff) |
[ruby/json] Implement a fast path for integer parsing
`rb_cstr2inum` isn't very fast because it handles tons of
different scenarios, and also require a NULL terminated string
which forces us to copy the number into a secondary buffer.
But since the parser already computed the length, we can much more
cheaply do this with a very simple function as long as the number
is small enough to fit into a native type (`long long`).
If the number is too long, we can fallback to the `rb_cstr2inum`
slowpath.
Before:
```
== Parsing citm_catalog.json (1727030 bytes)
ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. https://github.com/ruby/json/commit/7943f98a8a) +YJIT +PRISM [arm64-darwin24]
Warming up --------------------------------------
json 40.000 i/100ms
oj 35.000 i/100ms
Oj::Parser 45.000 i/100ms
rapidjson 38.000 i/100ms
Calculating -------------------------------------
json 425.941 (± 1.9%) i/s (2.35 ms/i) - 2.160k in 5.072833s
oj 349.617 (± 1.7%) i/s (2.86 ms/i) - 1.750k in 5.006953s
Oj::Parser 464.767 (± 1.7%) i/s (2.15 ms/i) - 2.340k in 5.036381s
rapidjson 382.413 (± 2.4%) i/s (2.61 ms/i) - 1.938k in 5.070757s
Comparison:
json: 425.9 i/s
Oj::Parser: 464.8 i/s - 1.09x faster
rapidjson: 382.4 i/s - 1.11x slower
oj: 349.6 i/s - 1.22x slower
```
After:
```
== Parsing citm_catalog.json (1727030 bytes)
ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. https://github.com/ruby/json/commit/7943f98a8a) +YJIT +PRISM [arm64-darwin24]
Warming up --------------------------------------
json 46.000 i/100ms
oj 33.000 i/100ms
Oj::Parser 45.000 i/100ms
rapidjson 39.000 i/100ms
Calculating -------------------------------------
json 462.332 (± 3.2%) i/s (2.16 ms/i) - 2.346k in 5.080504s
oj 351.140 (± 1.1%) i/s (2.85 ms/i) - 1.782k in 5.075616s
Oj::Parser 473.500 (± 1.3%) i/s (2.11 ms/i) - 2.385k in 5.037695s
rapidjson 395.052 (± 3.5%) i/s (2.53 ms/i) - 1.989k in 5.042275s
Comparison:
json: 462.3 i/s
Oj::Parser: 473.5 i/s - same-ish: difference falls within error
rapidjson: 395.1 i/s - 1.17x slower
oj: 351.1 i/s - 1.32x slower
```
https://github.com/ruby/json/commit/3a4dc9e1b4
Diffstat (limited to 'ext/json/parser/parser.rl')
-rw-r--r-- | ext/json/parser/parser.rl | 34 |
1 files changed, 30 insertions, 4 deletions
diff --git a/ext/json/parser/parser.rl b/ext/json/parser/parser.rl index 06f23fa8b1..9620b1964e 100644 --- a/ext/json/parser/parser.rl +++ b/ext/json/parser/parser.rl @@ -694,6 +694,28 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul main := '-'? ('0' | [1-9][0-9]*) (^[0-9]? @exit); }%% +#define MAX_FAST_INTEGER_SIZE 18 +static inline VALUE fast_parse_integer(char *p, char *pe) +{ + bool negative = false; + if (*p == '-') { + negative = true; + p++; + } + + long long memo = 0; + while (p < pe) { + memo *= 10; + memo += *p - '0'; + p++; + } + + if (negative) { + memo = -memo; + } + return LL2NUM(memo); +} + static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) { int cs = EVIL; @@ -704,10 +726,14 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res if (cs >= JSON_integer_first_final) { long len = p - json->memo; - fbuffer_clear(&json->fbuffer); - fbuffer_append(&json->fbuffer, json->memo, len); - fbuffer_append_char(&json->fbuffer, '\0'); - *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); + if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) { + *result = fast_parse_integer(json->memo, p); + } else { + fbuffer_clear(&json->fbuffer); + fbuffer_append(&json->fbuffer, json->memo, len); + fbuffer_append_char(&json->fbuffer, '\0'); + *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); + } return p + 1; } else { return NULL; |