diff options
author | Jean Boussier <[email protected]> | 2024-10-31 14:47:17 +0100 |
---|---|---|
committer | Hiroshi SHIBATA <[email protected]> | 2024-11-01 13:04:24 +0900 |
commit | 165cc6cf40b684be94d91c9a595754cb6465551c (patch) | |
tree | 608762376d6c5baa3bd3306988accd4f08b52b27 /ext/json/parser/parser.rl | |
parent | 081689b9e2cac65b664117802d01824303ae83c5 (diff) |
[ruby/json] json_string_unescape: assume the string doesn't need escaping
If that assumption holds true, then we don't need to copy the
string into a buffer to unescape it. For small string is just saves
copying, but for large ones it also saves a malloc/free combo.
Before:
```
== Parsing twitter.json (567916 bytes)
ruby 3.3.4 (2024-07-09 revision https://github.com/ruby/json/commit/be1089c8ec) +YJIT [arm64-darwin23]
Warming up --------------------------------------
json 52.000 i/100ms
oj 61.000 i/100ms
oj strict 70.000 i/100ms
Oj::Parser 71.000 i/100ms
rapidjson 55.000 i/100ms
Calculating -------------------------------------
json 510.111 (± 2.9%) i/s (1.96 ms/i) - 2.548k in 5.000029s
oj 610.232 (± 3.1%) i/s (1.64 ms/i) - 3.050k in 5.003725s
oj strict 713.231 (± 3.2%) i/s (1.40 ms/i) - 3.570k in 5.010902s
Oj::Parser 762.598 (± 3.0%) i/s (1.31 ms/i) - 3.834k in 5.033130s
rapidjson 553.029 (± 7.4%) i/s (1.81 ms/i) - 2.750k in 5.022630s
Comparison:
json: 510.1 i/s
Oj::Parser: 762.6 i/s - 1.49x faster
oj strict: 713.2 i/s - 1.40x faster
oj: 610.2 i/s - 1.20x faster
rapidjson: 553.0 i/s - same-ish: difference falls within error
== Parsing citm_catalog.json (1727030 bytes)
ruby 3.3.4 (2024-07-09 revision https://github.com/ruby/json/commit/be1089c8ec) +YJIT [arm64-darwin23]
Warming up --------------------------------------
json 28.000 i/100ms
oj 33.000 i/100ms
oj strict 37.000 i/100ms
Oj::Parser 43.000 i/100ms
rapidjson 38.000 i/100ms
Calculating -------------------------------------
json 303.853 (± 3.6%) i/s (3.29 ms/i) - 1.540k in 5.076079s
oj 348.009 (± 2.0%) i/s (2.87 ms/i) - 1.749k in 5.027738s
oj strict 396.679 (± 3.3%) i/s (2.52 ms/i) - 1.998k in 5.042271s
Oj::Parser 406.699 (± 2.2%) i/s (2.46 ms/i) - 2.064k in 5.077587s
rapidjson 393.463 (± 3.3%) i/s (2.54 ms/i) - 1.976k in 5.028501s
Comparison:
json: 303.9 i/s
Oj::Parser: 406.7 i/s - 1.34x faster
oj strict: 396.7 i/s - 1.31x faster
rapidjson: 393.5 i/s - 1.29x faster
oj: 348.0 i/s - 1.15x faster
```
After:
```
== Parsing twitter.json (567916 bytes)
ruby 3.3.4 (2024-07-09 revision https://github.com/ruby/json/commit/be1089c8ec) +YJIT [arm64-darwin23]
Warming up --------------------------------------
json 56.000 i/100ms
oj 62.000 i/100ms
oj strict 72.000 i/100ms
Oj::Parser 77.000 i/100ms
rapidjson 55.000 i/100ms
Calculating -------------------------------------
json 568.025 (± 2.1%) i/s (1.76 ms/i) - 2.856k in 5.030272s
oj 630.936 (± 1.4%) i/s (1.58 ms/i) - 3.162k in 5.012630s
oj strict 705.784 (±11.2%) i/s (1.42 ms/i) - 3.456k in 5.006706s
Oj::Parser 783.989 (± 1.7%) i/s (1.28 ms/i) - 3.927k in 5.010343s
rapidjson 557.630 (± 2.0%) i/s (1.79 ms/i) - 2.805k in 5.032388s
Comparison:
json: 568.0 i/s
Oj::Parser: 784.0 i/s - 1.38x faster
oj strict: 705.8 i/s - 1.24x faster
oj: 630.9 i/s - 1.11x faster
rapidjson: 557.6 i/s - same-ish: difference falls within error
== Parsing citm_catalog.json (1727030 bytes)
ruby 3.3.4 (2024-07-09 revision https://github.com/ruby/json/commit/be1089c8ec) +YJIT [arm64-darwin23]
Warming up --------------------------------------
json 29.000 i/100ms
oj 33.000 i/100ms
oj strict 38.000 i/100ms
Oj::Parser 43.000 i/100ms
rapidjson 37.000 i/100ms
Calculating -------------------------------------
json 319.271 (± 3.1%) i/s (3.13 ms/i) - 1.595k in 5.001128s
oj 347.946 (± 1.7%) i/s (2.87 ms/i) - 1.749k in 5.028395s
oj strict 396.914 (± 3.0%) i/s (2.52 ms/i) - 2.014k in 5.079645s
Oj::Parser 409.311 (± 2.7%) i/s (2.44 ms/i) - 2.064k in 5.046626s
rapidjson 394.752 (± 1.5%) i/s (2.53 ms/i) - 1.998k in 5.062776s
Comparison:
json: 319.3 i/s
Oj::Parser: 409.3 i/s - 1.28x faster
oj strict: 396.9 i/s - 1.24x faster
rapidjson: 394.8 i/s - 1.24x faster
oj: 347.9 i/s - 1.09x faster
```
https://github.com/ruby/json/commit/7e0f66546a
Diffstat (limited to 'ext/json/parser/parser.rl')
-rw-r--r-- | ext/json/parser/parser.rl | 17 |
1 files changed, 11 insertions, 6 deletions
diff --git a/ext/json/parser/parser.rl b/ext/json/parser/parser.rl index 35a9766ddb..808e557488 100644 --- a/ext/json/parser/parser.rl +++ b/ext/json/parser/parser.rl @@ -461,7 +461,7 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul } } -static inline VALUE build_string(const char *buffer, const char *bufferStart, bool intern, bool symbolize) +static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize) { if (symbolize) { intern = true; @@ -469,12 +469,12 @@ static inline VALUE build_string(const char *buffer, const char *bufferStart, bo VALUE result; # ifdef HAVE_RB_ENC_INTERNED_STR if (intern) { - result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding()); + result = rb_enc_interned_str(start, (long)(end - start), rb_utf8_encoding()); } else { - result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); + result = rb_utf8_str_new(start, (long)(end - start)); } # else - result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); + result = rb_utf8_str_new(start, (long)(end - start)); if (intern) { # if STR_UMINUS_DEDUPE_FROZEN // Starting from MRI 3.0 it is preferable to freeze the string @@ -499,7 +499,7 @@ static inline VALUE build_string(const char *buffer, const char *bufferStart, bo } static const size_t MAX_STACK_BUFFER_SIZE = 128; -static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize) +static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bool symbolize) { VALUE result = Qnil; size_t bufferSize = stringEnd - string; @@ -507,6 +507,11 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int int unescape_len; char buf[4]; + pe = memchr(p, '\\', bufferSize); + if (RB_LIKELY(pe == NULL)) { + return build_string(string, stringEnd, intern, symbolize); + } + if (bufferSize > MAX_STACK_BUFFER_SIZE) { # ifdef HAVE_RB_ENC_INTERNED_STR bufferStart = buffer = ALLOC_N(char, bufferSize ? bufferSize : 1); @@ -609,7 +614,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int buffer += pe - p; } - result = build_string(buffer, bufferStart, intern, symbolize); + result = build_string(bufferStart, buffer, intern, symbolize); if (bufferSize > MAX_STACK_BUFFER_SIZE) { ruby_xfree(bufferStart); |