diff options
author | Jean Boussier <[email protected]> | 2024-11-05 19:57:46 +0100 |
---|---|---|
committer | Hiroshi SHIBATA <[email protected]> | 2024-11-26 15:11:05 +0900 |
commit | ee0de3fd4e02f95f42fd3fe9cb18bcfe3e7e2bf1 (patch) | |
tree | 4063efad59cae2b049ad2261e86bcffba78aca6e | |
parent | 519701657fad62799045f6230f79de1941cc3c76 (diff) |
[ruby/json] JSON.dump: write directly into the provided IO
Ref: https://github.com/ruby/json/issues/524
Rather than to buffer everything in memory.
Unfortunately Ruby doesn't provide an API to write into
and IO without first allocating a string, which is a bit
wasteful.
https://github.com/ruby/json/commit/f017af6c0a
-rw-r--r-- | ext/json/fbuffer/fbuffer.h | 62 | ||||
-rw-r--r-- | ext/json/generator/generator.c | 49 | ||||
-rw-r--r-- | ext/json/lib/json/common.rb | 17 | ||||
-rw-r--r-- | ext/json/lib/json/ext/generator/state.rb | 11 | ||||
-rw-r--r-- | test/json/json_common_interface_test.rb | 11 |
5 files changed, 98 insertions, 52 deletions
diff --git a/ext/json/fbuffer/fbuffer.h b/ext/json/fbuffer/fbuffer.h index 3e154a5fa8..0774c7e464 100644 --- a/ext/json/fbuffer/fbuffer.h +++ b/ext/json/fbuffer/fbuffer.h @@ -46,9 +46,11 @@ typedef struct FBufferStruct { unsigned long len; unsigned long capa; char *ptr; + VALUE io; } FBuffer; #define FBUFFER_STACK_SIZE 512 +#define FBUFFER_IO_BUFFER_SIZE (16384 - 1) #define FBUFFER_INITIAL_LENGTH_DEFAULT 1024 #define FBUFFER_PTR(fb) ((fb)->ptr) @@ -66,7 +68,7 @@ static void fbuffer_append_long(FBuffer *fb, long number); #endif static inline void fbuffer_append_char(FBuffer *fb, char newchr); #ifdef JSON_GENERATOR -static VALUE fbuffer_to_s(FBuffer *fb); +static VALUE fbuffer_finalize(FBuffer *fb); #endif static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char *stack_buffer, long stack_buffer_size) @@ -86,24 +88,19 @@ static void fbuffer_free(FBuffer *fb) } } -#ifndef JSON_GENERATOR static void fbuffer_clear(FBuffer *fb) { fb->len = 0; } -#endif -static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested) +static void fbuffer_flush(FBuffer *fb) { - unsigned long required; - - if (RB_UNLIKELY(!fb->ptr)) { - fb->ptr = ALLOC_N(char, fb->initial_length); - fb->capa = fb->initial_length; - } - - for (required = fb->capa; requested > required - fb->len; required <<= 1); + rb_io_write(fb->io, rb_utf8_str_new(fb->ptr, fb->len)); + fbuffer_clear(fb); +} +static void fbuffer_realloc(FBuffer *fb, unsigned long required) +{ if (required > fb->capa) { if (fb->type == FBUFFER_STACK_ALLOCATED) { const char *old_buffer = fb->ptr; @@ -117,6 +114,32 @@ static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested) } } +static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested) +{ + if (RB_UNLIKELY(fb->io)) { + if (fb->capa < FBUFFER_IO_BUFFER_SIZE) { + fbuffer_realloc(fb, FBUFFER_IO_BUFFER_SIZE); + } else { + fbuffer_flush(fb); + } + + if (RB_LIKELY(requested < fb->capa)) { + return; + } + } + + unsigned long required; + + if (RB_UNLIKELY(!fb->ptr)) { + fb->ptr = ALLOC_N(char, fb->initial_length); + fb->capa = fb->initial_length; + } + + for (required = fb->capa; requested > required - fb->len; required <<= 1); + + fbuffer_realloc(fb, required); +} + static inline void fbuffer_inc_capa(FBuffer *fb, unsigned long requested) { if (RB_UNLIKELY(requested > fb->capa - fb->len)) { @@ -174,11 +197,18 @@ static void fbuffer_append_long(FBuffer *fb, long number) fbuffer_append(fb, buffer_end - len, len); } -static VALUE fbuffer_to_s(FBuffer *fb) +static VALUE fbuffer_finalize(FBuffer *fb) { - VALUE result = rb_utf8_str_new(FBUFFER_PTR(fb), FBUFFER_LEN(fb)); - fbuffer_free(fb); - return result; + if (fb->io) { + fbuffer_flush(fb); + fbuffer_free(fb); + rb_io_flush(fb->io); + return fb->io; + } else { + VALUE result = rb_utf8_str_new(FBUFFER_PTR(fb), FBUFFER_LEN(fb)); + fbuffer_free(fb); + return result; + } } #endif #endif diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c index c4f356ac67..503baca65f 100644 --- a/ext/json/generator/generator.c +++ b/ext/json/generator/generator.c @@ -54,7 +54,7 @@ struct generate_json_data { }; static VALUE cState_from_state_s(VALUE self, VALUE opts); -static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func); +static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io); static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); @@ -453,7 +453,7 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self) { rb_check_arity(argc, 0, 1); VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_object); + return cState_partial_generate(Vstate, self, generate_json_object, Qfalse); } /* @@ -467,7 +467,7 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self) static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) { rb_check_arity(argc, 0, 1); VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_array); + return cState_partial_generate(Vstate, self, generate_json_array, Qfalse); } #ifdef RUBY_INTEGER_UNIFICATION @@ -480,7 +480,7 @@ static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self) { rb_check_arity(argc, 0, 1); VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_integer); + return cState_partial_generate(Vstate, self, generate_json_integer, Qfalse); } #else @@ -493,7 +493,7 @@ static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self) { rb_check_arity(argc, 0, 1); VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_fixnum); + return cState_partial_generate(Vstate, self, generate_json_fixnum, Qfalse); } /* @@ -505,7 +505,7 @@ static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self) { rb_check_arity(argc, 0, 1); VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_bignum); + return cState_partial_generate(Vstate, self, generate_json_bignum, Qfalse); } #endif @@ -518,7 +518,7 @@ static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self) { rb_check_arity(argc, 0, 1); VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_float); + return cState_partial_generate(Vstate, self, generate_json_float, Qfalse); } /* @@ -543,7 +543,7 @@ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self) { rb_check_arity(argc, 0, 1); VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_string); + return cState_partial_generate(Vstate, self, generate_json_string, Qfalse); } /* @@ -638,7 +638,7 @@ static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self) rb_scan_args(argc, argv, "01", &state); Check_Type(string, T_STRING); state = cState_from_state_s(cState, state); - return cState_partial_generate(state, string, generate_json_string); + return cState_partial_generate(state, string, generate_json_string, Qfalse); } static void State_mark(void *ptr) @@ -1045,12 +1045,14 @@ static VALUE generate_json_rescue(VALUE d, VALUE exc) return Qundef; } -static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func) +static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, VALUE io) { GET_STATE(self); char stack_buffer[FBUFFER_STACK_SIZE]; - FBuffer buffer = {0}; + FBuffer buffer = { + .io = RTEST(io) ? io : Qfalse, + }; fbuffer_stack_init(&buffer, state->buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE); struct generate_json_data data = { @@ -1062,19 +1064,12 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func) }; rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data); - return fbuffer_to_s(&buffer); + return fbuffer_finalize(&buffer); } -/* - * call-seq: generate(obj) - * - * Generates a valid JSON document from object +obj+ and returns the - * result. If no valid JSON document can be created this method raises a - * GeneratorError exception. - */ -static VALUE cState_generate(VALUE self, VALUE obj) +static VALUE cState_generate(VALUE self, VALUE obj, VALUE io) { - VALUE result = cState_partial_generate(self, obj, generate_json); + VALUE result = cState_partial_generate(self, obj, generate_json, io); GET_STATE(self); (void)state; return result; @@ -1502,14 +1497,16 @@ static VALUE cState_configure(VALUE self, VALUE opts) return self; } -static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts) +static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io) { JSON_Generator_State state = {0}; state_init(&state); configure_state(&state, opts); char stack_buffer[FBUFFER_STACK_SIZE]; - FBuffer buffer = {0}; + FBuffer buffer = { + .io = RTEST(io) ? io : Qfalse, + }; fbuffer_stack_init(&buffer, state.buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE); struct generate_json_data data = { @@ -1521,7 +1518,7 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts) }; rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data); - return fbuffer_to_s(&buffer); + return fbuffer_finalize(&buffer); } /* @@ -1583,9 +1580,9 @@ void Init_generator(void) rb_define_method(cState, "depth=", cState_depth_set, 1); rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0); rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1); - rb_define_method(cState, "generate", cState_generate, 1); + rb_define_private_method(cState, "_generate", cState_generate, 2); - rb_define_singleton_method(cState, "generate", cState_m_generate, 2); + rb_define_singleton_method(cState, "generate", cState_m_generate, 3); VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods"); diff --git a/ext/json/lib/json/common.rb b/ext/json/lib/json/common.rb index 3b06202095..a88a3fffa5 100644 --- a/ext/json/lib/json/common.rb +++ b/ext/json/lib/json/common.rb @@ -286,7 +286,7 @@ module JSON if State === opts opts.generate(obj) else - State.generate(obj, opts) + State.generate(obj, opts, nil) end end @@ -801,18 +801,15 @@ module JSON opts = opts.merge(:max_nesting => limit) if limit opts = merge_dump_options(opts, **kwargs) if kwargs - result = begin - generate(obj, opts) + begin + if State === opts + opts.generate(obj, anIO) + else + State.generate(obj, opts, anIO) + end rescue JSON::NestingError raise ArgumentError, "exceed depth limit" end - - if anIO.nil? - result - else - anIO.write result - anIO - end end # Encodes string using String.encode. diff --git a/ext/json/lib/json/ext/generator/state.rb b/ext/json/lib/json/ext/generator/state.rb index 6cd9496e67..1e0d5245b1 100644 --- a/ext/json/lib/json/ext/generator/state.rb +++ b/ext/json/lib/json/ext/generator/state.rb @@ -47,6 +47,17 @@ module JSON alias_method :merge, :configure + # call-seq: + # generate(obj) -> String + # generate(obj, anIO) -> anIO + # + # Generates a valid JSON document from object +obj+ and returns the + # result. If no valid JSON document can be created this method raises a + # GeneratorError exception. + def generate(obj, io = nil) + _generate(obj, io) + end + # call-seq: to_h # # Returns the configuration instance variables as a hash, that can be diff --git a/test/json/json_common_interface_test.rb b/test/json/json_common_interface_test.rb index a5d62337e1..1f157da026 100644 --- a/test/json/json_common_interface_test.rb +++ b/test/json/json_common_interface_test.rb @@ -162,6 +162,17 @@ class JSONCommonInterfaceTest < Test::Unit::TestCase assert_equal too_deep, dump(obj, strict: false) end + def test_dump_in_io + io = StringIO.new + assert_same io, JSON.dump([1], io) + assert_equal "[1]", io.string + + big_object = ["a" * 10, "b" * 40, { foo: 1.23 }] * 5000 + io.rewind + assert_same io, JSON.dump(big_object, io) + assert_equal JSON.dump(big_object), io.string + end + def test_dump_should_modify_defaults max_nesting = JSON.dump_default_options[:max_nesting] dump([], StringIO.new, 10) |