summaryrefslogtreecommitdiff
path: root/ext/json/parser/parser.c
diff options
context:
space:
mode:
authorJean Boussier <[email protected]>2024-11-06 12:58:23 +0100
committerJean Boussier <[email protected]>2024-11-06 23:31:30 +0100
commitd188a6883fead74d3468da96c0e3d05efd759a1e (patch)
tree10f6bb4fa568772be17e200f2d7db780696032b1 /ext/json/parser/parser.c
parent29d76d8c8b9e45f51a2fa6819b3854f2423239b0 (diff)
[ruby/json] Implement a fast path for integer parsing
`rb_cstr2inum` isn't very fast because it handles tons of different scenarios, and also require a NULL terminated string which forces us to copy the number into a secondary buffer. But since the parser already computed the length, we can much more cheaply do this with a very simple function as long as the number is small enough to fit into a native type (`long long`). If the number is too long, we can fallback to the `rb_cstr2inum` slowpath. Before: ``` == Parsing citm_catalog.json (1727030 bytes) ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. https://github.com/ruby/json/commit/7943f98a8a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- json 40.000 i/100ms oj 35.000 i/100ms Oj::Parser 45.000 i/100ms rapidjson 38.000 i/100ms Calculating ------------------------------------- json 425.941 (± 1.9%) i/s (2.35 ms/i) - 2.160k in 5.072833s oj 349.617 (± 1.7%) i/s (2.86 ms/i) - 1.750k in 5.006953s Oj::Parser 464.767 (± 1.7%) i/s (2.15 ms/i) - 2.340k in 5.036381s rapidjson 382.413 (± 2.4%) i/s (2.61 ms/i) - 1.938k in 5.070757s Comparison: json: 425.9 i/s Oj::Parser: 464.8 i/s - 1.09x faster rapidjson: 382.4 i/s - 1.11x slower oj: 349.6 i/s - 1.22x slower ``` After: ``` == Parsing citm_catalog.json (1727030 bytes) ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. https://github.com/ruby/json/commit/7943f98a8a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- json 46.000 i/100ms oj 33.000 i/100ms Oj::Parser 45.000 i/100ms rapidjson 39.000 i/100ms Calculating ------------------------------------- json 462.332 (± 3.2%) i/s (2.16 ms/i) - 2.346k in 5.080504s oj 351.140 (± 1.1%) i/s (2.85 ms/i) - 1.782k in 5.075616s Oj::Parser 473.500 (± 1.3%) i/s (2.11 ms/i) - 2.385k in 5.037695s rapidjson 395.052 (± 3.5%) i/s (2.53 ms/i) - 1.989k in 5.042275s Comparison: json: 462.3 i/s Oj::Parser: 473.5 i/s - same-ish: difference falls within error rapidjson: 395.1 i/s - 1.17x slower oj: 351.1 i/s - 1.32x slower ``` https://github.com/ruby/json/commit/3a4dc9e1b4
Diffstat (limited to 'ext/json/parser/parser.c')
-rw-r--r--ext/json/parser/parser.c166
1 files changed, 96 insertions, 70 deletions
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c
index db9b13a689..a5c918fa12 100644
--- a/ext/json/parser/parser.c
+++ b/ext/json/parser/parser.c
@@ -1488,20 +1488,42 @@ enum {JSON_integer_en_main = 1};
#line 695 "parser.rl"
+#define MAX_FAST_INTEGER_SIZE 18
+static inline VALUE fast_parse_integer(char *p, char *pe)
+{
+ bool negative = false;
+ if (*p == '-') {
+ negative = true;
+ p++;
+ }
+
+ long long memo = 0;
+ while (p < pe) {
+ memo *= 10;
+ memo += *p - '0';
+ p++;
+ }
+
+ if (negative) {
+ memo = -memo;
+ }
+ return LL2NUM(memo);
+}
+
static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
{
int cs = EVIL;
-#line 1497 "parser.c"
+#line 1519 "parser.c"
{
cs = JSON_integer_start;
}
-#line 702 "parser.rl"
+#line 724 "parser.rl"
json->memo = p;
-#line 1505 "parser.c"
+#line 1527 "parser.c"
{
if ( p == pe )
goto _test_eof;
@@ -1542,7 +1564,7 @@ st4:
if ( ++p == pe )
goto _test_eof4;
case 4:
-#line 1546 "parser.c"
+#line 1568 "parser.c"
goto st0;
st5:
if ( ++p == pe )
@@ -1561,14 +1583,18 @@ case 5:
_out: {}
}
-#line 704 "parser.rl"
+#line 726 "parser.rl"
if (cs >= JSON_integer_first_final) {
long len = p - json->memo;
- fbuffer_clear(&json->fbuffer);
- fbuffer_append(&json->fbuffer, json->memo, len);
- fbuffer_append_char(&json->fbuffer, '\0');
- *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10);
+ if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) {
+ *result = fast_parse_integer(json->memo, p);
+ } else {
+ fbuffer_clear(&json->fbuffer);
+ fbuffer_append(&json->fbuffer, json->memo, len);
+ fbuffer_append_char(&json->fbuffer, '\0');
+ *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10);
+ }
return p + 1;
} else {
return NULL;
@@ -1576,7 +1602,7 @@ case 5:
}
-#line 1580 "parser.c"
+#line 1606 "parser.c"
enum {JSON_float_start = 1};
enum {JSON_float_first_final = 8};
enum {JSON_float_error = 0};
@@ -1584,7 +1610,7 @@ enum {JSON_float_error = 0};
enum {JSON_float_en_main = 1};
-#line 729 "parser.rl"
+#line 755 "parser.rl"
static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
@@ -1592,15 +1618,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul
int cs = EVIL;
-#line 1596 "parser.c"
+#line 1622 "parser.c"
{
cs = JSON_float_start;
}
-#line 736 "parser.rl"
+#line 762 "parser.rl"
json->memo = p;
-#line 1604 "parser.c"
+#line 1630 "parser.c"
{
if ( p == pe )
goto _test_eof;
@@ -1658,14 +1684,14 @@ case 8:
goto st0;
goto tr9;
tr9:
-#line 723 "parser.rl"
+#line 749 "parser.rl"
{ p--; {p++; cs = 9; goto _out;} }
goto st9;
st9:
if ( ++p == pe )
goto _test_eof9;
case 9:
-#line 1669 "parser.c"
+#line 1695 "parser.c"
goto st0;
st5:
if ( ++p == pe )
@@ -1726,7 +1752,7 @@ case 7:
_out: {}
}
-#line 738 "parser.rl"
+#line 764 "parser.rl"
if (cs >= JSON_float_first_final) {
VALUE mod = Qnil;
@@ -1779,7 +1805,7 @@ case 7:
-#line 1783 "parser.c"
+#line 1809 "parser.c"
enum {JSON_array_start = 1};
enum {JSON_array_first_final = 22};
enum {JSON_array_error = 0};
@@ -1787,7 +1813,7 @@ enum {JSON_array_error = 0};
enum {JSON_array_en_main = 1};
-#line 815 "parser.rl"
+#line 841 "parser.rl"
static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
@@ -1800,14 +1826,14 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul
long stack_head = json->stack->head;
-#line 1804 "parser.c"
+#line 1830 "parser.c"
{
cs = JSON_array_start;
}
-#line 827 "parser.rl"
+#line 853 "parser.rl"
-#line 1811 "parser.c"
+#line 1837 "parser.c"
{
short _widec;
if ( p == pe )
@@ -1847,7 +1873,7 @@ case 2:
goto st2;
goto st0;
tr2:
-#line 795 "parser.rl"
+#line 821 "parser.rl"
{
VALUE v = Qnil;
char *np = JSON_parse_value(json, p, pe, &v, current_nesting);
@@ -1862,12 +1888,12 @@ st3:
if ( ++p == pe )
goto _test_eof3;
case 3:
-#line 1866 "parser.c"
+#line 1892 "parser.c"
_widec = (*p);
if ( 44 <= (*p) && (*p) <= 44 ) {
_widec = (short)(128 + ((*p) - -128));
if (
-#line 805 "parser.rl"
+#line 831 "parser.rl"
json->allow_trailing_comma ) _widec += 256;
}
switch( _widec ) {
@@ -1914,14 +1940,14 @@ case 7:
goto st3;
goto st7;
tr4:
-#line 807 "parser.rl"
+#line 833 "parser.rl"
{ p--; {p++; cs = 22; goto _out;} }
goto st22;
st22:
if ( ++p == pe )
goto _test_eof22;
case 22:
-#line 1925 "parser.c"
+#line 1951 "parser.c"
goto st0;
st8:
if ( ++p == pe )
@@ -1989,13 +2015,13 @@ case 13:
if ( 10 <= (*p) && (*p) <= 10 ) {
_widec = (short)(128 + ((*p) - -128));
if (
-#line 805 "parser.rl"
+#line 831 "parser.rl"
json->allow_trailing_comma ) _widec += 256;
}
} else if ( (*p) >= 9 ) {
_widec = (short)(128 + ((*p) - -128));
if (
-#line 805 "parser.rl"
+#line 831 "parser.rl"
json->allow_trailing_comma ) _widec += 256;
}
} else if ( (*p) > 13 ) {
@@ -2003,19 +2029,19 @@ case 13:
if ( 47 <= (*p) && (*p) <= 47 ) {
_widec = (short)(128 + ((*p) - -128));
if (
-#line 805 "parser.rl"
+#line 831 "parser.rl"
json->allow_trailing_comma ) _widec += 256;
}
} else if ( (*p) >= 32 ) {
_widec = (short)(128 + ((*p) - -128));
if (
-#line 805 "parser.rl"
+#line 831 "parser.rl"
json->allow_trailing_comma ) _widec += 256;
}
} else {
_widec = (short)(128 + ((*p) - -128));
if (
-#line 805 "parser.rl"
+#line 831 "parser.rl"
json->allow_trailing_comma ) _widec += 256;
}
switch( _widec ) {
@@ -2054,13 +2080,13 @@ case 14:
if ( 47 <= (*p) && (*p) <= 47 ) {
_widec = (short)(128 + ((*p) - -128));
if (
-#line 805 "parser.rl"
+#line 831 "parser.rl"
json->allow_trailing_comma ) _widec += 256;
}
} else if ( (*p) >= 42 ) {
_widec = (short)(128 + ((*p) - -128));
if (
-#line 805 "parser.rl"
+#line 831 "parser.rl"
json->allow_trailing_comma ) _widec += 256;
}
switch( _widec ) {
@@ -2079,20 +2105,20 @@ case 15:
if ( (*p) <= 41 ) {
_widec = (short)(128 + ((*p) - -128));
if (
-#line 805 "parser.rl"
+#line 831 "parser.rl"
json->allow_trailing_comma ) _widec += 256;
}
} else if ( (*p) > 42 ) {
if ( 43 <= (*p) )
{ _widec = (short)(128 + ((*p) - -128));
if (
-#line 805 "parser.rl"
+#line 831 "parser.rl"
json->allow_trailing_comma ) _widec += 256;
}
} else {
_widec = (short)(128 + ((*p) - -128));
if (
-#line 805 "parser.rl"
+#line 831 "parser.rl"
json->allow_trailing_comma ) _widec += 256;
}
switch( _widec ) {
@@ -2115,13 +2141,13 @@ case 16:
if ( 42 <= (*p) && (*p) <= 42 ) {
_widec = (short)(128 + ((*p) - -128));
if (
-#line 805 "parser.rl"
+#line 831 "parser.rl"
json->allow_trailing_comma ) _widec += 256;
}
} else {
_widec = (short)(128 + ((*p) - -128));
if (
-#line 805 "parser.rl"
+#line 831 "parser.rl"
json->allow_trailing_comma ) _widec += 256;
}
} else if ( (*p) > 46 ) {
@@ -2129,19 +2155,19 @@ case 16:
if ( 48 <= (*p) )
{ _widec = (short)(128 + ((*p) - -128));
if (
-#line 805 "parser.rl"
+#line 831 "parser.rl"
json->allow_trailing_comma ) _widec += 256;
}
} else if ( (*p) >= 47 ) {
_widec = (short)(128 + ((*p) - -128));
if (
-#line 805 "parser.rl"
+#line 831 "parser.rl"
json->allow_trailing_comma ) _widec += 256;
}
} else {
_widec = (short)(128 + ((*p) - -128));
if (
-#line 805 "parser.rl"
+#line 831 "parser.rl"
json->allow_trailing_comma ) _widec += 256;
}
switch( _widec ) {
@@ -2165,20 +2191,20 @@ case 17:
if ( (*p) <= 9 ) {
_widec = (short)(128 + ((*p) - -128));
if (
-#line 805 "parser.rl"
+#line 831 "parser.rl"
json->allow_trailing_comma ) _widec += 256;
}
} else if ( (*p) > 10 ) {
if ( 11 <= (*p) )
{ _widec = (short)(128 + ((*p) - -128));
if (
-#line 805 "parser.rl"
+#line 831 "parser.rl"
json->allow_trailing_comma ) _widec += 256;
}
} else {
_widec = (short)(128 + ((*p) - -128));
if (
-#line 805 "parser.rl"
+#line 831 "parser.rl"
json->allow_trailing_comma ) _widec += 256;
}
switch( _widec ) {
@@ -2250,7 +2276,7 @@ case 21:
_out: {}
}
-#line 828 "parser.rl"
+#line 854 "parser.rl"
if(cs >= JSON_array_first_final) {
long count = json->stack->head - stack_head;
@@ -2444,7 +2470,7 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE
}
-#line 2448 "parser.c"
+#line 2474 "parser.c"
enum {JSON_string_start = 1};
enum {JSON_string_first_final = 9};
enum {JSON_string_error = 0};
@@ -2452,7 +2478,7 @@ enum {JSON_string_error = 0};
enum {JSON_string_en_main = 1};
-#line 1051 "parser.rl"
+#line 1077 "parser.rl"
static int
@@ -2473,15 +2499,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
VALUE match_string;
-#line 2477 "parser.c"
+#line 2503 "parser.c"
{
cs = JSON_string_start;
}
-#line 1071 "parser.rl"
+#line 1097 "parser.rl"
json->memo = p;
-#line 2485 "parser.c"
+#line 2511 "parser.c"
{
if ( p == pe )
goto _test_eof;
@@ -2506,14 +2532,14 @@ case 2:
goto st0;
goto st2;
tr2:
-#line 1033 "parser.rl"
+#line 1059 "parser.rl"
{
*result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
{p = (( p + 1))-1;}
p--;
{p++; cs = 9; goto _out;}
}
-#line 1026 "parser.rl"
+#line 1052 "parser.rl"
{
*result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
{p = (( p + 1))-1;}
@@ -2522,7 +2548,7 @@ tr2:
}
goto st9;
tr6:
-#line 1026 "parser.rl"
+#line 1052 "parser.rl"
{
*result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
{p = (( p + 1))-1;}
@@ -2534,7 +2560,7 @@ st9:
if ( ++p == pe )
goto _test_eof9;
case 9:
-#line 2538 "parser.c"
+#line 2564 "parser.c"
goto st0;
st3:
if ( ++p == pe )
@@ -2622,7 +2648,7 @@ case 8:
_out: {}
}
-#line 1073 "parser.rl"
+#line 1099 "parser.rl"
if (json->create_additions && RTEST(match_string = json->match_string)) {
VALUE klass;
@@ -2775,7 +2801,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
}
-#line 2779 "parser.c"
+#line 2805 "parser.c"
enum {JSON_start = 1};
enum {JSON_first_final = 10};
enum {JSON_error = 0};
@@ -2783,7 +2809,7 @@ enum {JSON_error = 0};
enum {JSON_en_main = 1};
-#line 1239 "parser.rl"
+#line 1265 "parser.rl"
/*
@@ -2812,16 +2838,16 @@ static VALUE cParser_parse(VALUE self)
json->stack = &stack;
-#line 2816 "parser.c"
+#line 2842 "parser.c"
{
cs = JSON_start;
}
-#line 1267 "parser.rl"
+#line 1293 "parser.rl"
p = json->source;
pe = p + json->len;
-#line 2825 "parser.c"
+#line 2851 "parser.c"
{
if ( p == pe )
goto _test_eof;
@@ -2855,7 +2881,7 @@ st0:
cs = 0;
goto _out;
tr2:
-#line 1231 "parser.rl"
+#line 1257 "parser.rl"
{
char *np = JSON_parse_value(json, p, pe, &result, 0);
if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
@@ -2865,7 +2891,7 @@ st10:
if ( ++p == pe )
goto _test_eof10;
case 10:
-#line 2869 "parser.c"
+#line 2895 "parser.c"
switch( (*p) ) {
case 13: goto st10;
case 32: goto st10;
@@ -2954,7 +2980,7 @@ case 9:
_out: {}
}
-#line 1270 "parser.rl"
+#line 1296 "parser.rl"
if (json->stack_handle) {
rvalue_stack_eagerly_release(json->stack_handle);
@@ -2990,16 +3016,16 @@ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts)
json->stack = &stack;
-#line 2994 "parser.c"
+#line 3020 "parser.c"
{
cs = JSON_start;
}
-#line 1305 "parser.rl"
+#line 1331 "parser.rl"
p = json->source;
pe = p + json->len;
-#line 3003 "parser.c"
+#line 3029 "parser.c"
{
if ( p == pe )
goto _test_eof;
@@ -3033,7 +3059,7 @@ st0:
cs = 0;
goto _out;
tr2:
-#line 1231 "parser.rl"
+#line 1257 "parser.rl"
{
char *np = JSON_parse_value(json, p, pe, &result, 0);
if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
@@ -3043,7 +3069,7 @@ st10:
if ( ++p == pe )
goto _test_eof10;
case 10:
-#line 3047 "parser.c"
+#line 3073 "parser.c"
switch( (*p) ) {
case 13: goto st10;
case 32: goto st10;
@@ -3132,7 +3158,7 @@ case 9:
_out: {}
}
-#line 1308 "parser.rl"
+#line 1334 "parser.rl"
if (json->stack_handle) {
rvalue_stack_eagerly_release(json->stack_handle);