summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean Boussier <[email protected]>2024-11-05 19:57:46 +0100
committerHiroshi SHIBATA <[email protected]>2024-11-26 15:11:05 +0900
commitee0de3fd4e02f95f42fd3fe9cb18bcfe3e7e2bf1 (patch)
tree4063efad59cae2b049ad2261e86bcffba78aca6e
parent519701657fad62799045f6230f79de1941cc3c76 (diff)
[ruby/json] JSON.dump: write directly into the provided IO
Ref: https://github.com/ruby/json/issues/524 Rather than to buffer everything in memory. Unfortunately Ruby doesn't provide an API to write into and IO without first allocating a string, which is a bit wasteful. https://github.com/ruby/json/commit/f017af6c0a
-rw-r--r--ext/json/fbuffer/fbuffer.h62
-rw-r--r--ext/json/generator/generator.c49
-rw-r--r--ext/json/lib/json/common.rb17
-rw-r--r--ext/json/lib/json/ext/generator/state.rb11
-rw-r--r--test/json/json_common_interface_test.rb11
5 files changed, 98 insertions, 52 deletions
diff --git a/ext/json/fbuffer/fbuffer.h b/ext/json/fbuffer/fbuffer.h
index 3e154a5fa8..0774c7e464 100644
--- a/ext/json/fbuffer/fbuffer.h
+++ b/ext/json/fbuffer/fbuffer.h
@@ -46,9 +46,11 @@ typedef struct FBufferStruct {
unsigned long len;
unsigned long capa;
char *ptr;
+ VALUE io;
} FBuffer;
#define FBUFFER_STACK_SIZE 512
+#define FBUFFER_IO_BUFFER_SIZE (16384 - 1)
#define FBUFFER_INITIAL_LENGTH_DEFAULT 1024
#define FBUFFER_PTR(fb) ((fb)->ptr)
@@ -66,7 +68,7 @@ static void fbuffer_append_long(FBuffer *fb, long number);
#endif
static inline void fbuffer_append_char(FBuffer *fb, char newchr);
#ifdef JSON_GENERATOR
-static VALUE fbuffer_to_s(FBuffer *fb);
+static VALUE fbuffer_finalize(FBuffer *fb);
#endif
static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char *stack_buffer, long stack_buffer_size)
@@ -86,24 +88,19 @@ static void fbuffer_free(FBuffer *fb)
}
}
-#ifndef JSON_GENERATOR
static void fbuffer_clear(FBuffer *fb)
{
fb->len = 0;
}
-#endif
-static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested)
+static void fbuffer_flush(FBuffer *fb)
{
- unsigned long required;
-
- if (RB_UNLIKELY(!fb->ptr)) {
- fb->ptr = ALLOC_N(char, fb->initial_length);
- fb->capa = fb->initial_length;
- }
-
- for (required = fb->capa; requested > required - fb->len; required <<= 1);
+ rb_io_write(fb->io, rb_utf8_str_new(fb->ptr, fb->len));
+ fbuffer_clear(fb);
+}
+static void fbuffer_realloc(FBuffer *fb, unsigned long required)
+{
if (required > fb->capa) {
if (fb->type == FBUFFER_STACK_ALLOCATED) {
const char *old_buffer = fb->ptr;
@@ -117,6 +114,32 @@ static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested)
}
}
+static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested)
+{
+ if (RB_UNLIKELY(fb->io)) {
+ if (fb->capa < FBUFFER_IO_BUFFER_SIZE) {
+ fbuffer_realloc(fb, FBUFFER_IO_BUFFER_SIZE);
+ } else {
+ fbuffer_flush(fb);
+ }
+
+ if (RB_LIKELY(requested < fb->capa)) {
+ return;
+ }
+ }
+
+ unsigned long required;
+
+ if (RB_UNLIKELY(!fb->ptr)) {
+ fb->ptr = ALLOC_N(char, fb->initial_length);
+ fb->capa = fb->initial_length;
+ }
+
+ for (required = fb->capa; requested > required - fb->len; required <<= 1);
+
+ fbuffer_realloc(fb, required);
+}
+
static inline void fbuffer_inc_capa(FBuffer *fb, unsigned long requested)
{
if (RB_UNLIKELY(requested > fb->capa - fb->len)) {
@@ -174,11 +197,18 @@ static void fbuffer_append_long(FBuffer *fb, long number)
fbuffer_append(fb, buffer_end - len, len);
}
-static VALUE fbuffer_to_s(FBuffer *fb)
+static VALUE fbuffer_finalize(FBuffer *fb)
{
- VALUE result = rb_utf8_str_new(FBUFFER_PTR(fb), FBUFFER_LEN(fb));
- fbuffer_free(fb);
- return result;
+ if (fb->io) {
+ fbuffer_flush(fb);
+ fbuffer_free(fb);
+ rb_io_flush(fb->io);
+ return fb->io;
+ } else {
+ VALUE result = rb_utf8_str_new(FBUFFER_PTR(fb), FBUFFER_LEN(fb));
+ fbuffer_free(fb);
+ return result;
+ }
}
#endif
#endif
diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c
index c4f356ac67..503baca65f 100644
--- a/ext/json/generator/generator.c
+++ b/ext/json/generator/generator.c
@@ -54,7 +54,7 @@ struct generate_json_data {
};
static VALUE cState_from_state_s(VALUE self, VALUE opts);
-static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func);
+static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io);
static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
@@ -453,7 +453,7 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
{
rb_check_arity(argc, 0, 1);
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
- return cState_partial_generate(Vstate, self, generate_json_object);
+ return cState_partial_generate(Vstate, self, generate_json_object, Qfalse);
}
/*
@@ -467,7 +467,7 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) {
rb_check_arity(argc, 0, 1);
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
- return cState_partial_generate(Vstate, self, generate_json_array);
+ return cState_partial_generate(Vstate, self, generate_json_array, Qfalse);
}
#ifdef RUBY_INTEGER_UNIFICATION
@@ -480,7 +480,7 @@ static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self)
{
rb_check_arity(argc, 0, 1);
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
- return cState_partial_generate(Vstate, self, generate_json_integer);
+ return cState_partial_generate(Vstate, self, generate_json_integer, Qfalse);
}
#else
@@ -493,7 +493,7 @@ static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self)
{
rb_check_arity(argc, 0, 1);
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
- return cState_partial_generate(Vstate, self, generate_json_fixnum);
+ return cState_partial_generate(Vstate, self, generate_json_fixnum, Qfalse);
}
/*
@@ -505,7 +505,7 @@ static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self)
{
rb_check_arity(argc, 0, 1);
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
- return cState_partial_generate(Vstate, self, generate_json_bignum);
+ return cState_partial_generate(Vstate, self, generate_json_bignum, Qfalse);
}
#endif
@@ -518,7 +518,7 @@ static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
{
rb_check_arity(argc, 0, 1);
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
- return cState_partial_generate(Vstate, self, generate_json_float);
+ return cState_partial_generate(Vstate, self, generate_json_float, Qfalse);
}
/*
@@ -543,7 +543,7 @@ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
{
rb_check_arity(argc, 0, 1);
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
- return cState_partial_generate(Vstate, self, generate_json_string);
+ return cState_partial_generate(Vstate, self, generate_json_string, Qfalse);
}
/*
@@ -638,7 +638,7 @@ static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self)
rb_scan_args(argc, argv, "01", &state);
Check_Type(string, T_STRING);
state = cState_from_state_s(cState, state);
- return cState_partial_generate(state, string, generate_json_string);
+ return cState_partial_generate(state, string, generate_json_string, Qfalse);
}
static void State_mark(void *ptr)
@@ -1045,12 +1045,14 @@ static VALUE generate_json_rescue(VALUE d, VALUE exc)
return Qundef;
}
-static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func)
+static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, VALUE io)
{
GET_STATE(self);
char stack_buffer[FBUFFER_STACK_SIZE];
- FBuffer buffer = {0};
+ FBuffer buffer = {
+ .io = RTEST(io) ? io : Qfalse,
+ };
fbuffer_stack_init(&buffer, state->buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE);
struct generate_json_data data = {
@@ -1062,19 +1064,12 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func)
};
rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
- return fbuffer_to_s(&buffer);
+ return fbuffer_finalize(&buffer);
}
-/*
- * call-seq: generate(obj)
- *
- * Generates a valid JSON document from object +obj+ and returns the
- * result. If no valid JSON document can be created this method raises a
- * GeneratorError exception.
- */
-static VALUE cState_generate(VALUE self, VALUE obj)
+static VALUE cState_generate(VALUE self, VALUE obj, VALUE io)
{
- VALUE result = cState_partial_generate(self, obj, generate_json);
+ VALUE result = cState_partial_generate(self, obj, generate_json, io);
GET_STATE(self);
(void)state;
return result;
@@ -1502,14 +1497,16 @@ static VALUE cState_configure(VALUE self, VALUE opts)
return self;
}
-static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts)
+static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
{
JSON_Generator_State state = {0};
state_init(&state);
configure_state(&state, opts);
char stack_buffer[FBUFFER_STACK_SIZE];
- FBuffer buffer = {0};
+ FBuffer buffer = {
+ .io = RTEST(io) ? io : Qfalse,
+ };
fbuffer_stack_init(&buffer, state.buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE);
struct generate_json_data data = {
@@ -1521,7 +1518,7 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts)
};
rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
- return fbuffer_to_s(&buffer);
+ return fbuffer_finalize(&buffer);
}
/*
@@ -1583,9 +1580,9 @@ void Init_generator(void)
rb_define_method(cState, "depth=", cState_depth_set, 1);
rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0);
rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1);
- rb_define_method(cState, "generate", cState_generate, 1);
+ rb_define_private_method(cState, "_generate", cState_generate, 2);
- rb_define_singleton_method(cState, "generate", cState_m_generate, 2);
+ rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
diff --git a/ext/json/lib/json/common.rb b/ext/json/lib/json/common.rb
index 3b06202095..a88a3fffa5 100644
--- a/ext/json/lib/json/common.rb
+++ b/ext/json/lib/json/common.rb
@@ -286,7 +286,7 @@ module JSON
if State === opts
opts.generate(obj)
else
- State.generate(obj, opts)
+ State.generate(obj, opts, nil)
end
end
@@ -801,18 +801,15 @@ module JSON
opts = opts.merge(:max_nesting => limit) if limit
opts = merge_dump_options(opts, **kwargs) if kwargs
- result = begin
- generate(obj, opts)
+ begin
+ if State === opts
+ opts.generate(obj, anIO)
+ else
+ State.generate(obj, opts, anIO)
+ end
rescue JSON::NestingError
raise ArgumentError, "exceed depth limit"
end
-
- if anIO.nil?
- result
- else
- anIO.write result
- anIO
- end
end
# Encodes string using String.encode.
diff --git a/ext/json/lib/json/ext/generator/state.rb b/ext/json/lib/json/ext/generator/state.rb
index 6cd9496e67..1e0d5245b1 100644
--- a/ext/json/lib/json/ext/generator/state.rb
+++ b/ext/json/lib/json/ext/generator/state.rb
@@ -47,6 +47,17 @@ module JSON
alias_method :merge, :configure
+ # call-seq:
+ # generate(obj) -> String
+ # generate(obj, anIO) -> anIO
+ #
+ # Generates a valid JSON document from object +obj+ and returns the
+ # result. If no valid JSON document can be created this method raises a
+ # GeneratorError exception.
+ def generate(obj, io = nil)
+ _generate(obj, io)
+ end
+
# call-seq: to_h
#
# Returns the configuration instance variables as a hash, that can be
diff --git a/test/json/json_common_interface_test.rb b/test/json/json_common_interface_test.rb
index a5d62337e1..1f157da026 100644
--- a/test/json/json_common_interface_test.rb
+++ b/test/json/json_common_interface_test.rb
@@ -162,6 +162,17 @@ class JSONCommonInterfaceTest < Test::Unit::TestCase
assert_equal too_deep, dump(obj, strict: false)
end
+ def test_dump_in_io
+ io = StringIO.new
+ assert_same io, JSON.dump([1], io)
+ assert_equal "[1]", io.string
+
+ big_object = ["a" * 10, "b" * 40, { foo: 1.23 }] * 5000
+ io.rewind
+ assert_same io, JSON.dump(big_object, io)
+ assert_equal JSON.dump(big_object), io.string
+ end
+
def test_dump_should_modify_defaults
max_nesting = JSON.dump_default_options[:max_nesting]
dump([], StringIO.new, 10)