1 /**********************************************************************
6 created at: Thu Apr 27 16:30:01 JST 1995
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
10 **********************************************************************/
12 #include "ruby/internal/config.h"
25 #include "internal/array.h"
26 #include "internal/bignum.h"
27 #include "internal/class.h"
28 #include "internal/encoding.h"
29 #include "internal/error.h"
30 #include "internal/hash.h"
31 #include "internal/numeric.h"
32 #include "internal/object.h"
33 #include "internal/struct.h"
34 #include "internal/symbol.h"
35 #include "internal/util.h"
36 #include "internal/vm.h"
38 #include "ruby/ruby.h"
40 #include "ruby/util.h"
43 #include "ruby/internal/attr/nonstring.h"
45 #define BITSPERSHORT (2*CHAR_BIT)
46 #define SHORTMASK ((1<<BITSPERSHORT)-1)
47 #define SHORTDN(x) RSHIFT((x),BITSPERSHORT)
49 #if SIZEOF_SHORT == SIZEOF_BDIGIT
50 #define SHORTLEN(x) (x)
53 shortlen(size_t len
, BDIGIT
*ds
)
63 return (len
- 1)*SIZEOF_BDIGIT
/2 + offset
;
65 #define SHORTLEN(x) shortlen((x),d)
68 #define MARSHAL_MAJOR 4
69 #define MARSHAL_MINOR 8
73 #define TYPE_FALSE 'F'
74 #define TYPE_FIXNUM 'i'
76 #define TYPE_EXTENDED 'e'
77 #define TYPE_UCLASS 'C'
78 #define TYPE_OBJECT 'o'
80 #define TYPE_USERDEF 'u'
81 #define TYPE_USRMARSHAL 'U'
82 #define TYPE_FLOAT 'f'
83 #define TYPE_BIGNUM 'l'
84 #define TYPE_STRING '"'
85 #define TYPE_REGEXP '/'
86 #define TYPE_ARRAY '['
88 #define TYPE_HASH_DEF '}'
89 #define TYPE_STRUCT 'S'
90 #define TYPE_MODULE_OLD 'M'
91 #define TYPE_CLASS 'c'
92 #define TYPE_MODULE 'm'
94 #define TYPE_SYMBOL ':'
95 #define TYPE_SYMLINK ';'
100 static ID s_dump
, s_load
, s_mdump
, s_mload
;
101 static ID s_dump_data
, s_load_data
, s_alloc
, s_call
;
102 static ID s_getbyte
, s_read
, s_write
, s_binmode
;
103 static ID s_encoding_short
, s_ruby2_keywords_flag
;
104 #define s_encoding_long rb_id_encoding()
106 #define name_s_dump "_dump"
107 #define name_s_load "_load"
108 #define name_s_mdump "marshal_dump"
109 #define name_s_mload "marshal_load"
110 #define name_s_dump_data "_dump_data"
111 #define name_s_load_data "_load_data"
112 #define name_s_alloc "_alloc"
113 #define name_s_call "call"
114 #define name_s_getbyte "getbyte"
115 #define name_s_read "read"
116 #define name_s_write "write"
117 #define name_s_binmode "binmode"
118 #define name_s_encoding_short "E"
119 #define name_s_encoding_long "encoding"
120 #define name_s_ruby2_keywords_flag "K"
125 VALUE (*dumper
)(VALUE
);
126 VALUE (*loader
)(VALUE
, VALUE
);
129 static st_table
*compat_allocator_tbl
;
130 static VALUE compat_allocator_tbl_wrapper
;
131 static VALUE
rb_marshal_dump_limited(VALUE obj
, VALUE port
, int limit
);
132 static VALUE
rb_marshal_load_with_proc(VALUE port
, VALUE proc
, bool freeze
);
134 static st_table
*compat_allocator_table(void);
137 rb_marshal_define_compat(VALUE newclass
, VALUE oldclass
, VALUE (*dumper
)(VALUE
), VALUE (*loader
)(VALUE
, VALUE
))
139 marshal_compat_t
*compat
;
140 rb_alloc_func_t allocator
= rb_get_alloc_func(newclass
);
143 rb_raise(rb_eTypeError
, "no allocator");
146 compat_allocator_table();
147 compat
= ALLOC(marshal_compat_t
);
148 RB_OBJ_WRITE(compat_allocator_tbl_wrapper
, &compat
->newclass
, newclass
);
149 RB_OBJ_WRITE(compat_allocator_tbl_wrapper
, &compat
->oldclass
, oldclass
);
150 compat
->dumper
= dumper
;
151 compat
->loader
= loader
;
153 st_insert(compat_allocator_table(), (st_data_t
)allocator
, (st_data_t
)compat
);
160 st_table
*compat_tbl
;
163 st_index_t num_entries
;
166 struct dump_call_arg
{
168 struct dump_arg
*arg
;
173 check_dump_arg(VALUE ret
, struct dump_arg
*arg
, const char *name
)
176 rb_raise(rb_eRuntimeError
, "Marshal.dump reentered at %s",
183 check_userdump_arg(VALUE obj
, ID sym
, int argc
, const VALUE
*argv
,
184 struct dump_arg
*arg
, const char *name
)
186 VALUE ret
= rb_funcallv(obj
, sym
, argc
, argv
);
187 VALUE klass
= CLASS_OF(obj
);
188 if (CLASS_OF(ret
) == klass
) {
189 rb_raise(rb_eRuntimeError
, "%"PRIsVALUE
"#%s returned same class instance",
192 return check_dump_arg(ret
, arg
, name
);
195 #define dump_funcall(arg, obj, sym, argc, argv) \
196 check_userdump_arg(obj, sym, argc, argv, arg, name_##sym)
197 #define dump_check_funcall(arg, obj, sym, argc, argv) \
198 check_dump_arg(rb_check_funcall(obj, sym, argc, argv), arg, name_##sym)
200 static void clear_dump_arg(struct dump_arg
*arg
);
203 mark_dump_arg(void *ptr
)
205 struct dump_arg
*p
= ptr
;
208 rb_mark_set(p
->symbols
);
209 rb_mark_set(p
->data
);
210 rb_mark_hash(p
->compat_tbl
);
211 rb_mark_set(p
->userdefs
);
216 free_dump_arg(void *ptr
)
222 memsize_dump_arg(const void *ptr
)
224 const struct dump_arg
*p
= (struct dump_arg
*)ptr
;
226 if (p
->symbols
) memsize
+= rb_st_memsize(p
->symbols
);
227 if (p
->data
) memsize
+= rb_st_memsize(p
->data
);
228 if (p
->compat_tbl
) memsize
+= rb_st_memsize(p
->compat_tbl
);
229 if (p
->userdefs
) memsize
+= rb_st_memsize(p
->userdefs
);
230 if (p
->encodings
) memsize
+= rb_st_memsize(p
->encodings
);
234 static const rb_data_type_t dump_arg_data
= {
236 {mark_dump_arg
, free_dump_arg
, memsize_dump_arg
,},
237 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
| RUBY_TYPED_EMBEDDABLE
241 must_not_be_anonymous(const char *type
, VALUE path
)
243 char *n
= RSTRING_PTR(path
);
245 if (!rb_enc_asciicompat(rb_enc_get(path
))) {
247 rb_raise(rb_eTypeError
, "can't dump non-ascii %s name % "PRIsVALUE
,
251 rb_raise(rb_eTypeError
, "can't dump anonymous %s % "PRIsVALUE
,
258 class2path(VALUE klass
)
260 VALUE path
= rb_class_path(klass
);
262 must_not_be_anonymous((RB_TYPE_P(klass
, T_CLASS
) ? "class" : "module"), path
);
263 if (rb_path_to_class(path
) != rb_class_real(klass
)) {
264 rb_raise(rb_eTypeError
, "% "PRIsVALUE
" can't be referred to", path
);
269 int ruby_marshal_write_long(long x
, char *buf
);
270 static void w_long(long, struct dump_arg
*);
271 static int w_encoding(VALUE encname
, struct dump_call_arg
*arg
);
272 static VALUE
encoding_name(VALUE obj
, struct dump_arg
*arg
);
275 w_nbyte(const char *s
, long n
, struct dump_arg
*arg
)
277 VALUE buf
= arg
->str
;
278 rb_str_buf_cat(buf
, s
, n
);
279 if (arg
->dest
&& RSTRING_LEN(buf
) >= BUFSIZ
) {
280 rb_io_write(arg
->dest
, buf
);
281 rb_str_resize(buf
, 0);
286 w_byte(char c
, struct dump_arg
*arg
)
292 w_bytes(const char *s
, long n
, struct dump_arg
*arg
)
298 #define w_cstr(s, arg) w_bytes((s), strlen(s), (arg))
301 w_short(int x
, struct dump_arg
*arg
)
303 w_byte((char)((x
>> 0) & 0xff), arg
);
304 w_byte((char)((x
>> 8) & 0xff), arg
);
308 w_long(long x
, struct dump_arg
*arg
)
310 char buf
[sizeof(long)+1];
311 int i
= ruby_marshal_write_long(x
, buf
);
313 rb_raise(rb_eTypeError
, "long too big to dump");
315 w_nbyte(buf
, i
, arg
);
319 ruby_marshal_write_long(long x
, char *buf
)
324 if (!(RSHIFT(x
, 31) == 0 || RSHIFT(x
, 31) == -1)) {
325 /* big long does not fit in 4 bytes */
334 if (0 < x
&& x
< 123) {
335 buf
[0] = (char)(x
+ 5);
338 if (-124 < x
&& x
< 0) {
339 buf
[0] = (char)((x
- 5)&0xff);
342 for (i
=1;i
<(int)sizeof(long)+1;i
++) {
343 buf
[i
] = (char)(x
& 0xff);
358 #define DECIMAL_MANT (53-16) /* from IEEE754 double precision */
360 #if DBL_MANT_DIG > 32
362 #elif DBL_MANT_DIG > 24
364 #elif DBL_MANT_DIG > 16
371 load_mantissa(double d
, const char *buf
, long len
)
374 if (--len
> 0 && !*buf
++) { /* binary mantissa mark */
375 int e
, s
= d
< 0, dig
= 0;
378 modf(ldexp(frexp(fabs(d
), &e
), DECIMAL_MANT
), &d
);
382 default: m
= *buf
++ & 0xff; /* fall through */
384 case 3: m
= (m
<< 8) | (*buf
++ & 0xff); /* fall through */
387 case 2: m
= (m
<< 8) | (*buf
++ & 0xff); /* fall through */
390 case 1: m
= (m
<< 8) | (*buf
++ & 0xff);
393 dig
-= len
< MANT_BITS
/ 8 ? 8 * (unsigned)len
: MANT_BITS
;
394 d
+= ldexp((double)m
, dig
);
395 } while ((len
-= MANT_BITS
/ 8) > 0);
396 d
= ldexp(d
, e
- DECIMAL_MANT
);
402 #define load_mantissa(d, buf, len) (d)
406 #define FLOAT_DIG (DBL_DIG+2)
412 w_float(double d
, struct dump_arg
*arg
)
414 char buf
[FLOAT_DIG
+ (DECIMAL_MANT
+ 7) / 8 + 10];
417 if (d
< 0) w_cstr("-inf", arg
);
418 else w_cstr("inf", arg
);
424 if (signbit(d
)) w_cstr("-0", arg
);
425 else w_cstr("0", arg
);
428 int decpt
, sign
, digs
, len
= 0;
429 char *e
, *p
= ruby_dtoa(d
, 0, 0, &decpt
, &sign
, &e
);
430 if (sign
) buf
[len
++] = '-';
432 if (decpt
< -3 || decpt
> digs
) {
434 if (--digs
> 0) buf
[len
++] = '.';
435 memcpy(buf
+ len
, p
+ 1, digs
);
437 len
+= snprintf(buf
+ len
, sizeof(buf
) - len
, "e%d", decpt
- 1);
439 else if (decpt
> 0) {
440 memcpy(buf
+ len
, p
, decpt
);
442 if ((digs
-= decpt
) > 0) {
444 memcpy(buf
+ len
, p
+ decpt
, digs
);
452 memset(buf
+ len
, '0', -decpt
);
455 memcpy(buf
+ len
, p
, digs
);
459 w_bytes(buf
, len
, arg
);
464 w_symbol(VALUE sym
, struct dump_arg
*arg
)
469 if (st_lookup(arg
->symbols
, sym
, &num
)) {
470 w_byte(TYPE_SYMLINK
, arg
);
471 w_long((long)num
, arg
);
474 const VALUE orig_sym
= sym
;
475 sym
= rb_sym2str(sym
);
477 rb_raise(rb_eTypeError
, "can't dump anonymous ID %"PRIdVALUE
, sym
);
479 encname
= encoding_name(sym
, arg
);
480 if (NIL_P(encname
) ||
481 is_ascii_string(sym
)) {
485 w_byte(TYPE_IVAR
, arg
);
487 w_byte(TYPE_SYMBOL
, arg
);
488 w_bytes(RSTRING_PTR(sym
), RSTRING_LEN(sym
), arg
);
489 st_add_direct(arg
->symbols
, orig_sym
, arg
->symbols
->num_entries
);
490 if (!NIL_P(encname
)) {
491 struct dump_call_arg c_arg
;
495 w_encoding(encname
, &c_arg
);
501 w_unique(VALUE s
, struct dump_arg
*arg
)
503 must_not_be_anonymous("class", s
);
504 w_symbol(rb_str_intern(s
), arg
);
507 static void w_object(VALUE
,struct dump_arg
*,int);
510 hash_each(VALUE key
, VALUE value
, VALUE v
)
512 struct dump_call_arg
*arg
= (void *)v
;
513 w_object(key
, arg
->arg
, arg
->limit
);
514 w_object(value
, arg
->arg
, arg
->limit
);
518 #define SINGLETON_DUMP_UNABLE_P(klass) \
519 (rb_id_table_size(RCLASS_M_TBL(klass)) > 0 || \
520 rb_ivar_count(klass) > 0)
523 w_extended(VALUE klass
, struct dump_arg
*arg
, int check
)
525 if (check
&& RCLASS_SINGLETON_P(klass
)) {
526 VALUE origin
= RCLASS_ORIGIN(klass
);
527 if (SINGLETON_DUMP_UNABLE_P(klass
) ||
528 (origin
!= klass
&& SINGLETON_DUMP_UNABLE_P(origin
))) {
529 rb_raise(rb_eTypeError
, "singleton can't be dumped");
531 klass
= RCLASS_SUPER(klass
);
533 while (BUILTIN_TYPE(klass
) == T_ICLASS
) {
534 if (!FL_TEST(klass
, RICLASS_IS_ORIGIN
) ||
535 BUILTIN_TYPE(RBASIC(klass
)->klass
) != T_MODULE
) {
536 VALUE path
= rb_class_name(RBASIC(klass
)->klass
);
537 w_byte(TYPE_EXTENDED
, arg
);
540 klass
= RCLASS_SUPER(klass
);
545 w_class(char type
, VALUE obj
, struct dump_arg
*arg
, int check
)
551 if (arg
->compat_tbl
&&
552 st_lookup(arg
->compat_tbl
, (st_data_t
)obj
, &real_obj
)) {
553 obj
= (VALUE
)real_obj
;
555 klass
= CLASS_OF(obj
);
556 w_extended(klass
, arg
, check
);
558 path
= class2path(rb_class_real(klass
));
563 w_uclass(VALUE obj
, VALUE super
, struct dump_arg
*arg
)
565 VALUE klass
= CLASS_OF(obj
);
567 w_extended(klass
, arg
, TRUE
);
568 klass
= rb_class_real(klass
);
569 if (klass
!= super
) {
570 w_byte(TYPE_UCLASS
, arg
);
571 w_unique(class2path(klass
), arg
);
576 rb_hash_ruby2_keywords_p(VALUE obj
)
578 return (RHASH(obj
)->basic
.flags
& RHASH_PASS_AS_KEYWORDS
) != 0;
582 rb_hash_ruby2_keywords(VALUE obj
)
584 RHASH(obj
)->basic
.flags
|= RHASH_PASS_AS_KEYWORDS
;
588 * if instance variable name `id` is a special name to be skipped,
589 * returns the name of it. otherwise it cannot be dumped (unnamed),
590 * returns `name` as-is. returns NULL for ID that can be dumped.
592 static inline const char *
593 skipping_ivar_name(const ID id
, const char *name
)
595 #define IS_SKIPPED_IVAR(idname) \
596 ((id == idname) && (name = name_##idname, true))
597 if (IS_SKIPPED_IVAR(s_encoding_short
)) return name
;
598 if (IS_SKIPPED_IVAR(s_ruby2_keywords_flag
)) return name
;
599 if (IS_SKIPPED_IVAR(s_encoding_long
)) return name
;
600 if (!rb_id2str(id
)) return name
;
605 struct dump_call_arg
*dump
;
610 w_obj_each(ID id
, VALUE value
, st_data_t a
)
612 struct w_ivar_arg
*ivarg
= (struct w_ivar_arg
*)a
;
613 struct dump_call_arg
*arg
= ivarg
->dump
;
614 const char unnamed
[] = "", *ivname
= skipping_ivar_name(id
, unnamed
);
617 if (ivname
!= unnamed
) {
618 rb_warn("instance variable '%s' on class %"PRIsVALUE
" is not dumped",
619 ivname
, CLASS_OF(arg
->obj
));
624 w_symbol(ID2SYM(id
), arg
->arg
);
625 w_object(value
, arg
->arg
, arg
->limit
);
630 obj_count_ivars(ID id
, VALUE val
, st_data_t a
)
632 if (!skipping_ivar_name(id
, "") && UNLIKELY(!++*(st_index_t
*)a
)) {
633 rb_raise(rb_eRuntimeError
, "too many instance variables");
639 encoding_name(VALUE obj
, struct dump_arg
*arg
)
641 if (rb_enc_capable(obj
)) {
642 int encidx
= rb_enc_get_index(obj
);
643 rb_encoding
*enc
= 0;
646 if (encidx
<= 0 || !(enc
= rb_enc_from_index(encidx
))) {
650 /* special treatment for US-ASCII and UTF-8 */
651 if (encidx
== rb_usascii_encindex()) {
654 else if (encidx
== rb_utf8_encindex()) {
659 !st_lookup(arg
->encodings
, (st_data_t
)rb_enc_name(enc
), &name
) :
660 (arg
->encodings
= st_init_strcasetable(), 1)) {
661 name
= (st_data_t
)rb_str_new_cstr(rb_enc_name(enc
));
662 st_insert(arg
->encodings
, (st_data_t
)rb_enc_name(enc
), name
);
672 w_encoding(VALUE encname
, struct dump_call_arg
*arg
)
674 int limit
= arg
->limit
;
675 if (limit
>= 0) ++limit
;
679 w_symbol(ID2SYM(s_encoding_short
), arg
->arg
);
680 w_object(encname
, arg
->arg
, limit
);
685 w_symbol(ID2SYM(rb_id_encoding()), arg
->arg
);
686 w_object(encname
, arg
->arg
, limit
);
691 has_ivars(VALUE obj
, VALUE encname
, VALUE
*ivobj
)
693 st_index_t num
= !NIL_P(encname
);
695 if (SPECIAL_CONST_P(obj
)) goto generic
;
696 switch (BUILTIN_TYPE(obj
)) {
700 break; /* counted elsewhere */
702 if (rb_hash_ruby2_keywords_p(obj
)) ++num
;
706 rb_ivar_foreach(obj
, obj_count_ivars
, (st_data_t
)&num
);
707 if (num
) *ivobj
= obj
;
714 w_ivar_each(VALUE obj
, st_index_t num
, struct dump_call_arg
*arg
)
716 shape_id_t shape_id
= rb_shape_get_shape_id(arg
->obj
);
717 struct w_ivar_arg ivarg
= {arg
, num
};
719 rb_ivar_foreach(obj
, w_obj_each
, (st_data_t
)&ivarg
);
721 if (shape_id
!= rb_shape_get_shape_id(arg
->obj
)) {
722 rb_shape_t
* expected_shape
= rb_shape_get_shape_by_id(shape_id
);
723 rb_shape_t
* actual_shape
= rb_shape_get_shape(arg
->obj
);
725 // If the shape tree got _shorter_ then we probably removed an IV
726 // If the shape tree got longer, then we probably added an IV.
727 // The exception message might not be accurate when someone adds and
728 // removes the same number of IVs, but they will still get an exception
729 if (rb_shape_depth(expected_shape
) > rb_shape_depth(actual_shape
)) {
730 rb_raise(rb_eRuntimeError
, "instance variable removed from %"PRIsVALUE
" instance",
734 rb_raise(rb_eRuntimeError
, "instance variable added to %"PRIsVALUE
" instance",
741 w_ivar(st_index_t num
, VALUE ivobj
, VALUE encname
, struct dump_call_arg
*arg
)
743 w_long(num
, arg
->arg
);
744 num
-= w_encoding(encname
, arg
);
745 if (RB_TYPE_P(ivobj
, T_HASH
) && rb_hash_ruby2_keywords_p(ivobj
)) {
746 int limit
= arg
->limit
;
747 if (limit
>= 0) ++limit
;
748 w_symbol(ID2SYM(s_ruby2_keywords_flag
), arg
->arg
);
749 w_object(Qtrue
, arg
->arg
, limit
);
752 if (!UNDEF_P(ivobj
) && num
) {
753 w_ivar_each(ivobj
, num
, arg
);
758 w_objivar(VALUE obj
, struct dump_call_arg
*arg
)
762 rb_ivar_foreach(obj
, obj_count_ivars
, (st_data_t
)&num
);
763 w_long(num
, arg
->arg
);
764 w_ivar_each(obj
, num
, arg
);
768 // Optimized dump for fixnum larger than 31-bits
770 w_bigfixnum(VALUE obj
, struct dump_arg
*arg
)
772 RUBY_ASSERT(FIXNUM_P(obj
));
774 w_byte(TYPE_BIGNUM
, arg
);
776 #if SIZEOF_LONG == SIZEOF_VALUE
780 long long num
, slen_num
;
784 char sign
= num
< 0 ? '-' : '+';
787 // Guaranteed not to overflow, as FIXNUM is 1-bit less than long
788 if (num
< 0) num
= -num
;
790 // calculate the size in shorts
796 slen_num
= SHORTDN(slen_num
);
800 RUBY_ASSERT(slen
> 0 && slen
<= SIZEOF_LONG
/ 2);
802 w_long((long)slen
, arg
);
804 for (int i
= 0; i
< slen
; i
++) {
805 w_short(num
& SHORTMASK
, arg
);
809 // We aren't adding this object to the link table, but we need to increment
813 RUBY_ASSERT(num
== 0);
818 w_remember(VALUE obj
, struct dump_arg
*arg
)
820 st_add_direct(arg
->data
, obj
, arg
->num_entries
++);
824 w_object(VALUE obj
, struct dump_arg
*arg
, int limit
)
826 struct dump_call_arg c_arg
;
827 VALUE ivobj
= Qundef
;
829 st_index_t hasiv
= 0;
830 VALUE encname
= Qnil
;
833 rb_raise(rb_eArgError
, "exceed depth limit");
837 w_byte(TYPE_NIL
, arg
);
839 else if (obj
== Qtrue
) {
840 w_byte(TYPE_TRUE
, arg
);
842 else if (obj
== Qfalse
) {
843 w_byte(TYPE_FALSE
, arg
);
845 else if (FIXNUM_P(obj
)) {
847 w_byte(TYPE_FIXNUM
, arg
);
848 w_long(FIX2INT(obj
), arg
);
850 if (RSHIFT((long)obj
, 31) == 0 || RSHIFT((long)obj
, 31) == -1) {
851 w_byte(TYPE_FIXNUM
, arg
);
852 w_long(FIX2LONG(obj
), arg
);
855 w_bigfixnum(obj
, arg
);
859 else if (SYMBOL_P(obj
)) {
863 if (st_lookup(arg
->data
, obj
, &num
)) {
864 w_byte(TYPE_LINK
, arg
);
865 w_long((long)num
, arg
);
869 if (limit
> 0) limit
--;
875 w_remember(obj
, arg
);
876 w_byte(TYPE_FLOAT
, arg
);
877 w_float(RFLOAT_VALUE(obj
), arg
);
883 if (!RBASIC_CLASS(obj
)) {
884 rb_raise(rb_eTypeError
, "can't dump internal %s",
885 rb_builtin_type_name(BUILTIN_TYPE(obj
)));
888 if (rb_obj_respond_to(obj
, s_mdump
, TRUE
)) {
889 w_remember(obj
, arg
);
891 v
= dump_funcall(arg
, obj
, s_mdump
, 0, 0);
892 w_class(TYPE_USRMARSHAL
, obj
, arg
, FALSE
);
893 w_object(v
, arg
, limit
);
896 if (rb_obj_respond_to(obj
, s_dump
, TRUE
)) {
897 VALUE ivobj2
= Qundef
;
901 if (arg
->userdefs
&& st_is_member(arg
->userdefs
, (st_data_t
)obj
)) {
902 rb_raise(rb_eRuntimeError
, "can't dump recursive object using _dump()");
905 v
= dump_funcall(arg
, obj
, s_dump
, 1, &v
);
906 if (!RB_TYPE_P(v
, T_STRING
)) {
907 rb_raise(rb_eTypeError
, "_dump() must return string");
909 hasiv
= has_ivars(obj
, (encname
= encoding_name(obj
, arg
)), &ivobj
);
910 hasiv2
= has_ivars(v
, (encname2
= encoding_name(v
, arg
)), &ivobj2
);
916 if (hasiv
) w_byte(TYPE_IVAR
, arg
);
917 w_class(TYPE_USERDEF
, obj
, arg
, FALSE
);
918 w_bytes(RSTRING_PTR(v
), RSTRING_LEN(v
), arg
);
920 st_data_t userdefs
= (st_data_t
)obj
;
921 if (!arg
->userdefs
) {
922 arg
->userdefs
= rb_init_identtable();
924 st_add_direct(arg
->userdefs
, userdefs
, 0);
925 w_ivar(hasiv
, ivobj
, encname
, &c_arg
);
926 st_delete(arg
->userdefs
, &userdefs
, NULL
);
928 w_remember(obj
, arg
);
932 w_remember(obj
, arg
);
934 hasiv
= has_ivars(obj
, (encname
= encoding_name(obj
, arg
)), &ivobj
);
936 st_data_t compat_data
;
937 rb_alloc_func_t allocator
= rb_get_alloc_func(RBASIC(obj
)->klass
);
938 if (st_lookup(compat_allocator_tbl
,
939 (st_data_t
)allocator
,
941 marshal_compat_t
*compat
= (marshal_compat_t
*)compat_data
;
942 VALUE real_obj
= obj
;
943 obj
= compat
->dumper(real_obj
);
944 if (!arg
->compat_tbl
) {
945 arg
->compat_tbl
= rb_init_identtable();
947 st_insert(arg
->compat_tbl
, (st_data_t
)obj
, (st_data_t
)real_obj
);
948 if (obj
!= real_obj
&& UNDEF_P(ivobj
)) hasiv
= 0;
951 if (hasiv
) w_byte(TYPE_IVAR
, arg
);
953 switch (BUILTIN_TYPE(obj
)) {
955 if (FL_TEST(obj
, FL_SINGLETON
)) {
956 rb_raise(rb_eTypeError
, "singleton class can't be dumped");
958 w_byte(TYPE_CLASS
, arg
);
960 VALUE path
= class2path(obj
);
961 w_bytes(RSTRING_PTR(path
), RSTRING_LEN(path
), arg
);
967 w_byte(TYPE_MODULE
, arg
);
969 VALUE path
= class2path(obj
);
970 w_bytes(RSTRING_PTR(path
), RSTRING_LEN(path
), arg
);
976 w_byte(TYPE_FLOAT
, arg
);
977 w_float(RFLOAT_VALUE(obj
), arg
);
981 w_byte(TYPE_BIGNUM
, arg
);
983 char sign
= BIGNUM_SIGN(obj
) ? '+' : '-';
984 size_t len
= BIGNUM_LEN(obj
);
987 BDIGIT
*d
= BIGNUM_DIGITS(obj
);
989 slen
= SHORTLEN(len
);
990 if (LONG_MAX
< slen
) {
991 rb_raise(rb_eTypeError
, "too big Bignum can't be dumped");
995 w_long((long)slen
, arg
);
996 for (j
= 0; j
< len
; j
++) {
997 #if SIZEOF_BDIGIT > SIZEOF_SHORT
1001 for (i
=0; i
<SIZEOF_BDIGIT
; i
+=SIZEOF_SHORT
) {
1002 w_short(num
& SHORTMASK
, arg
);
1004 if (j
== len
- 1 && num
== 0) break;
1015 w_uclass(obj
, rb_cString
, arg
);
1016 w_byte(TYPE_STRING
, arg
);
1017 w_bytes(RSTRING_PTR(obj
), RSTRING_LEN(obj
), arg
);
1021 w_uclass(obj
, rb_cRegexp
, arg
);
1022 w_byte(TYPE_REGEXP
, arg
);
1024 int opts
= rb_reg_options(obj
);
1025 w_bytes(RREGEXP_SRC_PTR(obj
), RREGEXP_SRC_LEN(obj
), arg
);
1026 w_byte((char)opts
, arg
);
1031 w_uclass(obj
, rb_cArray
, arg
);
1032 w_byte(TYPE_ARRAY
, arg
);
1034 long i
, len
= RARRAY_LEN(obj
);
1037 for (i
=0; i
<RARRAY_LEN(obj
); i
++) {
1038 w_object(RARRAY_AREF(obj
, i
), arg
, limit
);
1039 if (len
!= RARRAY_LEN(obj
)) {
1040 rb_raise(rb_eRuntimeError
, "array modified during dump");
1047 w_uclass(obj
, rb_cHash
, arg
);
1048 if (rb_hash_compare_by_id_p(obj
)) {
1049 w_byte(TYPE_UCLASS
, arg
);
1050 w_symbol(rb_sym_intern_ascii_cstr("Hash"), arg
);
1052 if (NIL_P(RHASH_IFNONE(obj
))) {
1053 w_byte(TYPE_HASH
, arg
);
1055 else if (FL_TEST(obj
, RHASH_PROC_DEFAULT
)) {
1056 rb_raise(rb_eTypeError
, "can't dump hash with default proc");
1059 w_byte(TYPE_HASH_DEF
, arg
);
1061 w_long(rb_hash_size_num(obj
), arg
);
1062 rb_hash_foreach(obj
, hash_each
, (st_data_t
)&c_arg
);
1063 if (!NIL_P(RHASH_IFNONE(obj
))) {
1064 w_object(RHASH_IFNONE(obj
), arg
, limit
);
1069 w_class(TYPE_STRUCT
, obj
, arg
, TRUE
);
1071 long len
= RSTRUCT_LEN(obj
);
1076 mem
= rb_struct_members(obj
);
1077 for (i
=0; i
<len
; i
++) {
1078 w_symbol(RARRAY_AREF(mem
, i
), arg
);
1079 w_object(RSTRUCT_GET(obj
, i
), arg
, limit
);
1085 w_class(TYPE_OBJECT
, obj
, arg
, TRUE
);
1086 w_objivar(obj
, &c_arg
);
1093 if (!rb_obj_respond_to(obj
, s_dump_data
, TRUE
)) {
1094 rb_raise(rb_eTypeError
,
1095 "no _dump_data is defined for class %"PRIsVALUE
,
1098 v
= dump_funcall(arg
, obj
, s_dump_data
, 0, 0);
1099 w_class(TYPE_DATA
, obj
, arg
, TRUE
);
1100 w_object(v
, arg
, limit
);
1105 rb_raise(rb_eTypeError
, "can't dump %"PRIsVALUE
,
1112 w_ivar(hasiv
, ivobj
, encname
, &c_arg
);
1117 clear_dump_arg(struct dump_arg
*arg
)
1119 if (!arg
->symbols
) return;
1120 st_free_table(arg
->symbols
);
1122 st_free_table(arg
->data
);
1124 arg
->num_entries
= 0;
1125 if (arg
->compat_tbl
) {
1126 st_free_table(arg
->compat_tbl
);
1127 arg
->compat_tbl
= 0;
1129 if (arg
->encodings
) {
1130 st_free_table(arg
->encodings
);
1133 if (arg
->userdefs
) {
1134 st_free_table(arg
->userdefs
);
1139 NORETURN(static inline void io_needed(void));
1143 rb_raise(rb_eTypeError
, "instance of IO needed");
1148 * dump( obj [, anIO] , limit=-1 ) -> anIO
1150 * Serializes obj and all descendant objects. If anIO is
1151 * specified, the serialized data will be written to it, otherwise the
1152 * data will be returned as a String. If limit is specified, the
1153 * traversal of subobjects will be limited to that depth. If limit is
1154 * negative, no checking of depth will be performed.
1157 * def initialize(str)
1165 * (produces no output)
1167 * o = Klass.new("hello\n")
1168 * data = Marshal.dump(o)
1169 * obj = Marshal.load(data)
1170 * obj.say_hello #=> "hello\n"
1172 * Marshal can't dump following objects:
1173 * * anonymous Class/Module.
1174 * * objects which are related to system (ex: Dir, File::Stat, IO, File, Socket
1176 * * an instance of MatchData, Data, Method, UnboundMethod, Proc, Thread,
1177 * ThreadGroup, Continuation
1178 * * objects which define singleton methods
1181 marshal_dump(int argc
, VALUE
*argv
, VALUE _
)
1183 VALUE obj
, port
, a1
, a2
;
1187 rb_scan_args(argc
, argv
, "12", &obj
, &a1
, &a2
);
1189 if (!NIL_P(a2
)) limit
= NUM2INT(a2
);
1190 if (NIL_P(a1
)) io_needed();
1193 else if (argc
== 2) {
1194 if (FIXNUM_P(a1
)) limit
= FIX2INT(a1
);
1195 else if (NIL_P(a1
)) io_needed();
1198 return rb_marshal_dump_limited(obj
, port
, limit
);
1202 rb_marshal_dump_limited(VALUE obj
, VALUE port
, int limit
)
1204 struct dump_arg
*arg
;
1205 VALUE wrapper
; /* used to avoid memory leak in case of exception */
1207 wrapper
= TypedData_Make_Struct(0, struct dump_arg
, &dump_arg_data
, arg
);
1209 arg
->symbols
= st_init_numtable();
1210 arg
->data
= rb_init_identtable();
1211 arg
->num_entries
= 0;
1212 arg
->compat_tbl
= 0;
1215 arg
->str
= rb_str_buf_new(0);
1217 if (!rb_respond_to(port
, s_write
)) {
1221 dump_check_funcall(arg
, port
, s_binmode
, 0, 0);
1227 w_byte(MARSHAL_MAJOR
, arg
);
1228 w_byte(MARSHAL_MINOR
, arg
);
1230 w_object(obj
, arg
, limit
);
1232 rb_io_write(arg
->dest
, arg
->str
);
1233 rb_str_resize(arg
->str
, 0);
1235 clear_dump_arg(arg
);
1236 RB_GC_GUARD(wrapper
);
1249 st_table
*partial_objects
;
1251 st_table
*compat_tbl
;
1256 check_load_arg(VALUE ret
, struct load_arg
*arg
, const char *name
)
1258 if (!arg
->symbols
) {
1259 rb_raise(rb_eRuntimeError
, "Marshal.load reentered at %s",
1264 #define load_funcall(arg, obj, sym, argc, argv) \
1265 check_load_arg(rb_funcallv(obj, sym, argc, argv), arg, name_##sym)
1267 static void clear_load_arg(struct load_arg
*arg
);
1270 mark_load_arg(void *ptr
)
1272 struct load_arg
*p
= ptr
;
1275 rb_mark_tbl(p
->symbols
);
1276 rb_mark_tbl(p
->data
);
1277 rb_mark_tbl(p
->partial_objects
);
1278 rb_mark_hash(p
->compat_tbl
);
1282 free_load_arg(void *ptr
)
1284 clear_load_arg(ptr
);
1288 memsize_load_arg(const void *ptr
)
1290 const struct load_arg
*p
= (struct load_arg
*)ptr
;
1292 if (p
->symbols
) memsize
+= rb_st_memsize(p
->symbols
);
1293 if (p
->data
) memsize
+= rb_st_memsize(p
->data
);
1294 if (p
->partial_objects
) memsize
+= rb_st_memsize(p
->partial_objects
);
1295 if (p
->compat_tbl
) memsize
+= rb_st_memsize(p
->compat_tbl
);
1299 static const rb_data_type_t load_arg_data
= {
1301 {mark_load_arg
, free_load_arg
, memsize_load_arg
,},
1302 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
| RUBY_TYPED_EMBEDDABLE
1305 #define r_entry(v, arg) r_entry0((v), (arg)->data->num_entries, (arg))
1306 static VALUE
r_object(struct load_arg
*arg
);
1307 static VALUE
r_symbol(struct load_arg
*arg
);
1309 NORETURN(static void too_short(void));
1313 rb_raise(rb_eArgError
, "marshal data too short");
1317 r_prepare(struct load_arg
*arg
)
1319 st_index_t idx
= arg
->data
->num_entries
;
1321 st_insert(arg
->data
, (st_data_t
)idx
, (st_data_t
)Qundef
);
1325 static unsigned char
1326 r_byte1_buffered(struct load_arg
*arg
)
1328 if (arg
->buflen
== 0) {
1329 long readable
= arg
->readable
< BUFSIZ
? arg
->readable
: BUFSIZ
;
1330 VALUE str
, n
= LONG2NUM(readable
);
1332 str
= load_funcall(arg
, arg
->src
, s_read
, 1, &n
);
1333 if (NIL_P(str
)) too_short();
1335 memcpy(arg
->buf
, RSTRING_PTR(str
), RSTRING_LEN(str
));
1337 arg
->buflen
= RSTRING_LEN(str
);
1340 return arg
->buf
[arg
->offset
++];
1344 r_byte(struct load_arg
*arg
)
1348 if (RB_TYPE_P(arg
->src
, T_STRING
)) {
1349 if (RSTRING_LEN(arg
->src
) > arg
->offset
) {
1350 c
= (unsigned char)RSTRING_PTR(arg
->src
)[arg
->offset
++];
1357 if (arg
->readable
>0 || arg
->buflen
> 0) {
1358 c
= r_byte1_buffered(arg
);
1361 VALUE v
= load_funcall(arg
, arg
->src
, s_getbyte
, 0, 0);
1362 if (NIL_P(v
)) rb_eof_error();
1363 c
= (unsigned char)NUM2CHR(v
);
1369 NORETURN(static void long_toobig(int size
));
1372 long_toobig(int size
)
1374 rb_raise(rb_eTypeError
, "long too big for this architecture (size "
1375 STRINGIZE(SIZEOF_LONG
)", given %d)", size
);
1379 r_long(struct load_arg
*arg
)
1382 int c
= (signed char)r_byte(arg
);
1385 if (c
== 0) return 0;
1387 if (4 < c
&& c
< 128) {
1390 if (c
> (int)sizeof(long)) long_toobig(c
);
1393 x
|= (long)r_byte(arg
) << (8*i
);
1397 if (-129 < c
&& c
< -4) {
1401 if (c
> (int)sizeof(long)) long_toobig(c
);
1404 x
&= ~((long)0xff << (8*i
));
1405 x
|= (long)r_byte(arg
) << (8*i
);
1412 ruby_marshal_read_long(const char **buf
, long len
)
1416 struct load_arg arg
;
1417 memset(&arg
, 0, sizeof(arg
));
1418 arg
.src
= rb_setup_fake_str(&src
, *buf
, len
, 0);
1425 r_bytes1(long len
, struct load_arg
*arg
)
1427 VALUE str
, n
= LONG2NUM(len
);
1429 str
= load_funcall(arg
, arg
->src
, s_read
, 1, &n
);
1430 if (NIL_P(str
)) too_short();
1432 if (RSTRING_LEN(str
) != len
) too_short();
1438 r_bytes1_buffered(long len
, struct load_arg
*arg
)
1442 if (len
<= arg
->buflen
) {
1443 str
= rb_str_new(arg
->buf
+arg
->offset
, len
);
1448 long buflen
= arg
->buflen
;
1449 long readable
= arg
->readable
+ 1;
1450 long tmp_len
, read_len
, need_len
= len
- buflen
;
1453 readable
= readable
< BUFSIZ
? readable
: BUFSIZ
;
1454 read_len
= need_len
> readable
? need_len
: readable
;
1455 n
= LONG2NUM(read_len
);
1456 tmp
= load_funcall(arg
, arg
->src
, s_read
, 1, &n
);
1457 if (NIL_P(tmp
)) too_short();
1460 tmp_len
= RSTRING_LEN(tmp
);
1462 if (tmp_len
< need_len
) too_short();
1464 str
= rb_str_new(arg
->buf
+arg
->offset
, buflen
);
1465 rb_str_cat(str
, RSTRING_PTR(tmp
), need_len
);
1467 if (tmp_len
> need_len
) {
1468 buflen
= tmp_len
- need_len
;
1469 memcpy(arg
->buf
, RSTRING_PTR(tmp
)+need_len
, buflen
);
1470 arg
->buflen
= buflen
;
1481 #define r_bytes(arg) r_bytes0(r_long(arg), (arg))
1484 r_bytes0(long len
, struct load_arg
*arg
)
1488 if (len
== 0) return rb_str_new(0, 0);
1489 if (RB_TYPE_P(arg
->src
, T_STRING
)) {
1490 if (RSTRING_LEN(arg
->src
) - arg
->offset
>= len
) {
1491 str
= rb_str_new(RSTRING_PTR(arg
->src
)+arg
->offset
, len
);
1499 if (arg
->readable
> 0 || arg
->buflen
> 0) {
1500 str
= r_bytes1_buffered(len
, arg
);
1503 str
= r_bytes1(len
, arg
);
1510 name_equal(const char *name
, size_t nlen
, const char *p
, long l
)
1512 if ((size_t)l
!= nlen
|| *p
!= *name
) return 0;
1513 return nlen
== 1 || memcmp(p
+1, name
+1, nlen
-1) == 0;
1517 sym2encidx(VALUE sym
, VALUE val
)
1519 RBIMPL_ATTR_NONSTRING() static const char name_encoding
[8] = "encoding";
1522 if (rb_enc_get_index(sym
) != ENCINDEX_US_ASCII
) return -1;
1523 RSTRING_GETMEM(sym
, p
, l
);
1524 if (l
<= 0) return -1;
1525 if (name_equal(name_encoding
, sizeof(name_encoding
), p
, l
)) {
1526 int idx
= rb_enc_find_index(StringValueCStr(val
));
1529 if (name_equal(name_s_encoding_short
, rb_strlen_lit(name_s_encoding_short
), p
, l
)) {
1530 if (val
== Qfalse
) return rb_usascii_encindex();
1531 else if (val
== Qtrue
) return rb_utf8_encindex();
1538 symname_equal(VALUE sym
, const char *name
, size_t nlen
)
1542 if (rb_enc_get_index(sym
) != ENCINDEX_US_ASCII
) return 0;
1543 RSTRING_GETMEM(sym
, p
, l
);
1544 return name_equal(name
, nlen
, p
, l
);
1547 #define BUILD_ASSERT_POSITIVE(n) \
1548 /* make 0 negative to workaround the "zero size array" GCC extension, */ \
1549 ((sizeof(char [2*(ssize_t)(n)-1])+1)/2) /* assuming no overflow */
1550 #define symname_equal_lit(sym, sym_name) \
1551 symname_equal(sym, sym_name, BUILD_ASSERT_POSITIVE(rb_strlen_lit(sym_name)))
1554 r_symlink(struct load_arg
*arg
)
1557 long num
= r_long(arg
);
1559 if (!st_lookup(arg
->symbols
, num
, &sym
)) {
1560 rb_raise(rb_eArgError
, "bad symbol");
1566 r_symreal(struct load_arg
*arg
, int ivar
)
1568 VALUE s
= r_bytes(arg
);
1571 st_index_t n
= arg
->symbols
->num_entries
;
1573 if (rb_enc_str_asciionly_p(s
)) rb_enc_associate_index(s
, ENCINDEX_US_ASCII
);
1574 st_insert(arg
->symbols
, (st_data_t
)n
, (st_data_t
)s
);
1576 long num
= r_long(arg
);
1578 sym
= r_symbol(arg
);
1579 idx
= sym2encidx(sym
, r_object(arg
));
1583 rb_enc_associate_index(s
, idx
);
1584 if (is_broken_string(s
)) {
1585 rb_raise(rb_eArgError
, "invalid byte sequence in %s: %+"PRIsVALUE
,
1586 rb_enc_name(rb_enc_from_index(idx
)), s
);
1594 r_symbol(struct load_arg
*arg
)
1599 switch ((type
= r_byte(arg
))) {
1601 rb_raise(rb_eArgError
, "dump format error for symbol(0x%x)", type
);
1606 return r_symreal(arg
, ivar
);
1609 rb_raise(rb_eArgError
, "dump format error (symlink with encoding)");
1611 return r_symlink(arg
);
1616 r_unique(struct load_arg
*arg
)
1618 return r_symbol(arg
);
1622 r_string(struct load_arg
*arg
)
1624 return r_bytes(arg
);
1628 r_entry0(VALUE v
, st_index_t num
, struct load_arg
*arg
)
1630 st_data_t real_obj
= (st_data_t
)v
;
1631 if (arg
->compat_tbl
) {
1632 /* real_obj is kept if not found */
1633 st_lookup(arg
->compat_tbl
, v
, &real_obj
);
1635 st_insert(arg
->data
, num
, real_obj
);
1636 st_insert(arg
->partial_objects
, (st_data_t
)real_obj
, Qtrue
);
1641 r_fixup_compat(VALUE v
, struct load_arg
*arg
)
1644 st_data_t key
= (st_data_t
)v
;
1645 if (arg
->compat_tbl
&& st_delete(arg
->compat_tbl
, &key
, &data
)) {
1646 VALUE real_obj
= (VALUE
)data
;
1647 rb_alloc_func_t allocator
= rb_get_alloc_func(CLASS_OF(real_obj
));
1648 if (st_lookup(compat_allocator_tbl
, (st_data_t
)allocator
, &data
)) {
1649 marshal_compat_t
*compat
= (marshal_compat_t
*)data
;
1650 compat
->loader(real_obj
, v
);
1658 r_post_proc(VALUE v
, struct load_arg
*arg
)
1661 v
= load_funcall(arg
, arg
->proc
, s_call
, 1, &v
);
1667 r_leave(VALUE v
, struct load_arg
*arg
, bool partial
)
1669 v
= r_fixup_compat(v
, arg
);
1672 st_data_t key
= (st_data_t
)v
;
1673 st_delete(arg
->partial_objects
, &key
, &data
);
1675 if (RB_TYPE_P(v
, T_MODULE
) || RB_TYPE_P(v
, T_CLASS
)) {
1678 else if (RB_TYPE_P(v
, T_STRING
)) {
1679 v
= rb_str_to_interned_str(v
);
1685 v
= r_post_proc(v
, arg
);
1691 copy_ivar_i(ID vid
, VALUE value
, st_data_t arg
)
1693 VALUE obj
= (VALUE
)arg
;
1695 if (!rb_ivar_defined(obj
, vid
))
1696 rb_ivar_set(obj
, vid
, value
);
1701 r_copy_ivar(VALUE v
, VALUE data
)
1703 rb_ivar_foreach(data
, copy_ivar_i
, (st_data_t
)v
);
1707 #define override_ivar_error(type, str) \
1708 rb_raise(rb_eTypeError, \
1709 "can't override instance variable of "type" '%"PRIsVALUE"'", \
1713 r_ivar(VALUE obj
, int *has_encoding
, struct load_arg
*arg
)
1719 if (RB_TYPE_P(obj
, T_MODULE
)) {
1720 override_ivar_error("module", rb_mod_name(obj
));
1722 else if (RB_TYPE_P(obj
, T_CLASS
)) {
1723 override_ivar_error("class", rb_class_name(obj
));
1726 VALUE sym
= r_symbol(arg
);
1727 VALUE val
= r_object(arg
);
1728 int idx
= sym2encidx(sym
, val
);
1730 if (rb_enc_capable(obj
)) {
1731 rb_enc_associate_index(obj
, idx
);
1734 rb_raise(rb_eArgError
, "%"PRIsVALUE
" is not enc_capable", obj
);
1736 if (has_encoding
) *has_encoding
= TRUE
;
1738 else if (symname_equal_lit(sym
, name_s_ruby2_keywords_flag
)) {
1739 if (RB_TYPE_P(obj
, T_HASH
)) {
1740 rb_hash_ruby2_keywords(obj
);
1743 rb_raise(rb_eArgError
, "ruby2_keywords flag is given but %"PRIsVALUE
" is not a Hash", obj
);
1747 rb_ivar_set(obj
, rb_intern_str(sym
), val
);
1749 } while (--len
> 0);
1754 path2class(VALUE path
)
1756 VALUE v
= rb_path_to_class(path
);
1758 if (!RB_TYPE_P(v
, T_CLASS
)) {
1759 rb_raise(rb_eArgError
, "%"PRIsVALUE
" does not refer to class", path
);
1764 #define path2module(path) must_be_module(rb_path_to_class(path), path)
1767 must_be_module(VALUE v
, VALUE path
)
1769 if (!RB_TYPE_P(v
, T_MODULE
)) {
1770 rb_raise(rb_eArgError
, "%"PRIsVALUE
" does not refer to module", path
);
1776 obj_alloc_by_klass(VALUE klass
, struct load_arg
*arg
, VALUE
*oldclass
)
1779 rb_alloc_func_t allocator
;
1781 allocator
= rb_get_alloc_func(klass
);
1782 if (st_lookup(compat_allocator_tbl
, (st_data_t
)allocator
, &data
)) {
1783 marshal_compat_t
*compat
= (marshal_compat_t
*)data
;
1784 VALUE real_obj
= rb_obj_alloc(klass
);
1785 VALUE obj
= rb_obj_alloc(compat
->oldclass
);
1786 if (oldclass
) *oldclass
= compat
->oldclass
;
1788 if (!arg
->compat_tbl
) {
1789 arg
->compat_tbl
= rb_init_identtable();
1791 st_insert(arg
->compat_tbl
, (st_data_t
)obj
, (st_data_t
)real_obj
);
1795 return rb_obj_alloc(klass
);
1799 obj_alloc_by_path(VALUE path
, struct load_arg
*arg
)
1801 return obj_alloc_by_klass(path2class(path
), arg
, 0);
1805 append_extmod(VALUE obj
, VALUE extmod
)
1807 long i
= RARRAY_LEN(extmod
);
1809 VALUE m
= RARRAY_AREF(extmod
, --i
);
1810 rb_extend_object(obj
, m
);
1815 #define prohibit_ivar(type, str) do { \
1816 if (!ivp || !*ivp) break; \
1817 override_ivar_error(type, str); \
1820 static VALUE
r_object_for(struct load_arg
*arg
, bool partial
, int *ivp
, VALUE extmod
, int type
);
1823 r_object0(struct load_arg
*arg
, bool partial
, int *ivp
, VALUE extmod
)
1825 int type
= r_byte(arg
);
1826 return r_object_for(arg
, partial
, ivp
, extmod
, type
);
1830 r_object_for(struct load_arg
*arg
, bool partial
, int *ivp
, VALUE extmod
, int type
)
1832 VALUE (*hash_new_with_size
)(st_index_t
) = rb_hash_new_with_size
;
1840 if (!st_lookup(arg
->data
, (st_data_t
)id
, &link
)) {
1841 rb_raise(rb_eArgError
, "dump format error (unlinked)");
1844 if (!st_lookup(arg
->partial_objects
, (st_data_t
)v
, &link
)) {
1845 v
= r_post_proc(v
, arg
);
1852 v
= r_object0(arg
, true, &ivar
, extmod
);
1853 if (ivar
) r_ivar(v
, NULL
, arg
);
1854 v
= r_leave(v
, arg
, partial
);
1860 VALUE path
= r_unique(arg
);
1861 VALUE m
= rb_path_to_class(path
);
1862 if (NIL_P(extmod
)) extmod
= rb_ary_hidden_new(0);
1864 if (RB_TYPE_P(m
, T_CLASS
)) { /* prepended */
1867 v
= r_object0(arg
, true, 0, Qnil
);
1869 if (c
!= m
|| FL_TEST(c
, FL_SINGLETON
)) {
1870 rb_raise(rb_eArgError
,
1871 "prepended class %"PRIsVALUE
" differs from class %"PRIsVALUE
,
1872 path
, rb_class_name(c
));
1874 c
= rb_singleton_class(v
);
1875 while (RARRAY_LEN(extmod
) > 0) {
1876 m
= rb_ary_pop(extmod
);
1877 rb_prepend_module(c
, m
);
1881 must_be_module(m
, path
);
1882 rb_ary_push(extmod
, m
);
1884 v
= r_object0(arg
, true, 0, extmod
);
1885 while (RARRAY_LEN(extmod
) > 0) {
1886 m
= rb_ary_pop(extmod
);
1887 rb_extend_object(v
, m
);
1890 v
= r_leave(v
, arg
, partial
);
1896 VALUE c
= path2class(r_unique(arg
));
1898 if (FL_TEST(c
, FL_SINGLETON
)) {
1899 rb_raise(rb_eTypeError
, "singleton can't be loaded");
1902 if ((c
== rb_cHash
) &&
1903 /* Hack for compare_by_identify */
1904 (type
== TYPE_HASH
|| type
== TYPE_HASH_DEF
)) {
1905 hash_new_with_size
= rb_ident_hash_new_with_size
;
1908 v
= r_object_for(arg
, partial
, 0, extmod
, type
);
1909 if (RB_SPECIAL_CONST_P(v
) || RB_TYPE_P(v
, T_OBJECT
) || RB_TYPE_P(v
, T_CLASS
)) {
1912 if (RB_TYPE_P(v
, T_MODULE
) || !RTEST(rb_class_inherited_p(c
, RBASIC(v
)->klass
))) {
1913 VALUE tmp
= rb_obj_alloc(c
);
1915 if (TYPE(v
) != TYPE(tmp
)) goto format_error
;
1917 RBASIC_SET_CLASS(v
, c
);
1922 rb_raise(rb_eArgError
, "dump format error (user class)");
1926 v
= r_leave(v
, arg
, false);
1931 v
= r_leave(v
, arg
, false);
1936 v
= r_leave(v
, arg
, false);
1941 long i
= r_long(arg
);
1944 v
= r_leave(v
, arg
, false);
1950 VALUE str
= r_bytes(arg
);
1951 const char *ptr
= RSTRING_PTR(str
);
1953 if (strcmp(ptr
, "nan") == 0) {
1956 else if (strcmp(ptr
, "inf") == 0) {
1959 else if (strcmp(ptr
, "-inf") == 0) {
1964 d
= strtod(ptr
, &e
);
1965 d
= load_mantissa(d
, e
, RSTRING_LEN(str
) - (e
- ptr
));
1968 v
= r_entry(v
, arg
);
1969 v
= r_leave(v
, arg
, false);
1982 if (SIZEOF_VALUE
>= 8 && len
<= 4) {
1983 // Representable within uintptr, likely FIXNUM
1985 for (int i
= 0; i
< len
; i
++) {
1986 num
|= (VALUE
)r_byte(arg
) << (i
* 16);
1987 num
|= (VALUE
)r_byte(arg
) << (i
* 16 + 8);
1989 #if SIZEOF_VALUE == SIZEOF_LONG
1995 v
= rb_int_uminus(v
);
1999 data
= r_bytes0(len
* 2, arg
);
2000 v
= rb_integer_unpack(RSTRING_PTR(data
), len
, 2, 0,
2001 INTEGER_PACK_LITTLE_ENDIAN
| (sign
== '-' ? INTEGER_PACK_NEGATIVE
: 0));
2002 rb_str_resize(data
, 0L);
2004 v
= r_entry(v
, arg
);
2005 v
= r_leave(v
, arg
, false);
2010 v
= r_entry(r_string(arg
), arg
);
2011 v
= r_leave(v
, arg
, partial
);
2016 VALUE str
= r_bytes(arg
);
2017 int options
= r_byte(arg
);
2018 int has_encoding
= FALSE
;
2019 st_index_t idx
= r_prepare(arg
);
2022 r_ivar(str
, &has_encoding
, arg
);
2025 if (!has_encoding
) {
2026 /* 1.8 compatibility; remove escapes undefined in 1.8 */
2027 char *ptr
= RSTRING_PTR(str
), *dst
= ptr
, *src
= ptr
;
2028 long len
= RSTRING_LEN(str
);
2030 for (; len
-- > 0; *dst
++ = *src
++) {
2032 case '\\': bs
++; break;
2033 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2034 case 'm': case 'o': case 'p': case 'q': case 'u': case 'y':
2035 case 'E': case 'F': case 'H': case 'I': case 'J': case 'K':
2036 case 'L': case 'N': case 'O': case 'P': case 'Q': case 'R':
2037 case 'S': case 'T': case 'U': case 'V': case 'X': case 'Y':
2040 default: bs
= 0; break;
2043 rb_str_set_len(str
, dst
- ptr
);
2045 VALUE regexp
= rb_reg_new_str(str
, options
);
2046 r_copy_ivar(regexp
, str
);
2048 v
= r_entry0(regexp
, idx
, arg
);
2049 v
= r_leave(v
, arg
, partial
);
2055 long len
= r_long(arg
);
2057 v
= rb_ary_new2(len
);
2058 v
= r_entry(v
, arg
);
2059 arg
->readable
+= len
- 1;
2061 rb_ary_push(v
, r_object(arg
));
2064 v
= r_leave(v
, arg
, partial
);
2073 long len
= r_long(arg
);
2075 v
= hash_new_with_size(len
);
2076 v
= r_entry(v
, arg
);
2077 arg
->readable
+= (len
- 1) * 2;
2079 VALUE key
= r_object(arg
);
2080 VALUE value
= r_object(arg
);
2081 rb_hash_aset(v
, key
, value
);
2085 if (type
== TYPE_HASH_DEF
) {
2086 RHASH_SET_IFNONE(v
, r_object(arg
));
2088 v
= r_leave(v
, arg
, partial
);
2097 st_index_t idx
= r_prepare(arg
);
2098 VALUE klass
= path2class(r_unique(arg
));
2099 long len
= r_long(arg
);
2101 v
= rb_obj_alloc(klass
);
2102 if (!RB_TYPE_P(v
, T_STRUCT
)) {
2103 rb_raise(rb_eTypeError
, "class %"PRIsVALUE
" not a struct", rb_class_name(klass
));
2105 mem
= rb_struct_s_members(klass
);
2106 if (RARRAY_LEN(mem
) != len
) {
2107 rb_raise(rb_eTypeError
, "struct %"PRIsVALUE
" not compatible (struct size differs)",
2108 rb_class_name(klass
));
2111 arg
->readable
+= (len
- 1) * 2;
2112 v
= r_entry0(v
, idx
, arg
);
2113 values
= rb_ary_new2(len
);
2115 VALUE keywords
= Qfalse
;
2116 if (RTEST(rb_struct_s_keyword_init(klass
))) {
2117 keywords
= rb_hash_new();
2118 rb_ary_push(values
, keywords
);
2121 for (i
=0; i
<len
; i
++) {
2122 VALUE n
= rb_sym2str(RARRAY_AREF(mem
, i
));
2123 slot
= r_symbol(arg
);
2125 if (!rb_str_equal(n
, slot
)) {
2126 rb_raise(rb_eTypeError
, "struct %"PRIsVALUE
" not compatible (:%"PRIsVALUE
" for :%"PRIsVALUE
")",
2127 rb_class_name(klass
),
2131 rb_hash_aset(keywords
, RARRAY_AREF(mem
, i
), r_object(arg
));
2134 rb_ary_push(values
, r_object(arg
));
2139 rb_struct_initialize(v
, values
);
2140 v
= r_leave(v
, arg
, partial
);
2147 VALUE name
= r_unique(arg
);
2148 VALUE klass
= path2class(name
);
2152 if (!rb_obj_respond_to(klass
, s_load
, TRUE
)) {
2153 rb_raise(rb_eTypeError
, "class %"PRIsVALUE
" needs to have method '_load'",
2156 data
= r_string(arg
);
2158 r_ivar(data
, NULL
, arg
);
2161 v
= load_funcall(arg
, klass
, s_load
, 1, &data
);
2162 v
= r_entry(v
, arg
);
2163 if (st_lookup(compat_allocator_tbl
, (st_data_t
)rb_get_alloc_func(klass
), &d
)) {
2164 marshal_compat_t
*compat
= (marshal_compat_t
*)d
;
2165 v
= compat
->loader(klass
, v
);
2171 v
= r_post_proc(v
, arg
);
2176 case TYPE_USRMARSHAL
:
2178 VALUE name
= r_unique(arg
);
2179 VALUE klass
= path2class(name
);
2183 v
= obj_alloc_by_klass(klass
, arg
, &oldclass
);
2184 if (!NIL_P(extmod
)) {
2185 /* for the case marshal_load is overridden */
2186 append_extmod(v
, extmod
);
2188 if (!rb_obj_respond_to(v
, s_mload
, TRUE
)) {
2189 rb_raise(rb_eTypeError
, "instance of %"PRIsVALUE
" needs to have method 'marshal_load'",
2192 v
= r_entry(v
, arg
);
2193 data
= r_object(arg
);
2194 load_funcall(arg
, v
, s_mload
, 1, &data
);
2195 v
= r_fixup_compat(v
, arg
);
2196 v
= r_copy_ivar(v
, data
);
2200 v
= r_post_proc(v
, arg
);
2201 if (!NIL_P(extmod
)) {
2202 if (oldclass
) append_extmod(v
, extmod
);
2203 rb_ary_clear(extmod
);
2210 st_index_t idx
= r_prepare(arg
);
2211 v
= obj_alloc_by_path(r_unique(arg
), arg
);
2212 if (!RB_TYPE_P(v
, T_OBJECT
)) {
2213 rb_raise(rb_eArgError
, "dump format error");
2215 v
= r_entry0(v
, idx
, arg
);
2216 r_ivar(v
, NULL
, arg
);
2217 v
= r_leave(v
, arg
, partial
);
2223 VALUE name
= r_unique(arg
);
2224 VALUE klass
= path2class(name
);
2228 v
= obj_alloc_by_klass(klass
, arg
, &oldclass
);
2229 if (!RB_TYPE_P(v
, T_DATA
)) {
2230 rb_raise(rb_eArgError
, "dump format error");
2232 v
= r_entry(v
, arg
);
2233 if (!rb_obj_respond_to(v
, s_load_data
, TRUE
)) {
2234 rb_raise(rb_eTypeError
,
2235 "class %"PRIsVALUE
" needs to have instance method '_load_data'",
2238 r
= r_object0(arg
, partial
, 0, extmod
);
2239 load_funcall(arg
, v
, s_load_data
, 1, &r
);
2240 v
= r_leave(v
, arg
, partial
);
2244 case TYPE_MODULE_OLD
:
2246 VALUE str
= r_bytes(arg
);
2248 v
= rb_path_to_class(str
);
2249 prohibit_ivar("class/module", str
);
2250 v
= r_entry(v
, arg
);
2251 v
= r_leave(v
, arg
, partial
);
2257 VALUE str
= r_bytes(arg
);
2259 v
= path2class(str
);
2260 prohibit_ivar("class", str
);
2261 v
= r_entry(v
, arg
);
2262 v
= r_leave(v
, arg
, partial
);
2268 VALUE str
= r_bytes(arg
);
2270 v
= path2module(str
);
2271 prohibit_ivar("module", str
);
2272 v
= r_entry(v
, arg
);
2273 v
= r_leave(v
, arg
, partial
);
2279 v
= r_symreal(arg
, *ivp
);
2283 v
= r_symreal(arg
, 0);
2285 v
= rb_str_intern(v
);
2286 v
= r_leave(v
, arg
, partial
);
2290 v
= rb_str_intern(r_symlink(arg
));
2294 rb_raise(rb_eArgError
, "dump format error(0x%x)", type
);
2299 rb_raise(rb_eArgError
, "dump format error (bad link)");
2306 r_object(struct load_arg
*arg
)
2308 return r_object0(arg
, false, 0, Qnil
);
2312 clear_load_arg(struct load_arg
*arg
)
2319 if (!arg
->symbols
) return;
2320 st_free_table(arg
->symbols
);
2322 st_free_table(arg
->data
);
2324 st_free_table(arg
->partial_objects
);
2325 arg
->partial_objects
= 0;
2326 if (arg
->compat_tbl
) {
2327 st_free_table(arg
->compat_tbl
);
2328 arg
->compat_tbl
= 0;
2333 rb_marshal_load_with_proc(VALUE port
, VALUE proc
, bool freeze
)
2337 VALUE wrapper
; /* used to avoid memory leak in case of exception */
2338 struct load_arg
*arg
;
2340 v
= rb_check_string_type(port
);
2344 else if (rb_respond_to(port
, s_getbyte
) && rb_respond_to(port
, s_read
)) {
2345 rb_check_funcall(port
, s_binmode
, 0, 0);
2350 wrapper
= TypedData_Make_Struct(0, struct load_arg
, &load_arg_data
, arg
);
2353 arg
->symbols
= st_init_numtable();
2354 arg
->data
= rb_init_identtable();
2355 arg
->partial_objects
= rb_init_identtable();
2356 arg
->compat_tbl
= 0;
2359 arg
->freeze
= freeze
;
2362 arg
->buf
= xmalloc(BUFSIZ
);
2366 major
= r_byte(arg
);
2367 minor
= r_byte(arg
);
2368 if (major
!= MARSHAL_MAJOR
|| minor
> MARSHAL_MINOR
) {
2369 clear_load_arg(arg
);
2370 rb_raise(rb_eTypeError
, "incompatible marshal file format (can't be read)\n\
2371 \tformat version %d.%d required; %d.%d given",
2372 MARSHAL_MAJOR
, MARSHAL_MINOR
, major
, minor
);
2374 if (RTEST(ruby_verbose
) && minor
!= MARSHAL_MINOR
) {
2375 rb_warn("incompatible marshal file format (can be read)\n\
2376 \tformat version %d.%d required; %d.%d given",
2377 MARSHAL_MAJOR
, MARSHAL_MINOR
, major
, minor
);
2380 if (!NIL_P(proc
)) arg
->proc
= proc
;
2382 clear_load_arg(arg
);
2383 RB_GC_GUARD(wrapper
);
2389 marshal_load(rb_execution_context_t
*ec
, VALUE mod
, VALUE source
, VALUE proc
, VALUE freeze
)
2391 return rb_marshal_load_with_proc(source
, proc
, RTEST(freeze
));
2394 #include "marshal.rbinc"
2397 * The marshaling library converts collections of Ruby objects into a
2398 * byte stream, allowing them to be stored outside the currently
2399 * active script. This data may subsequently be read and the original
2400 * objects reconstituted.
2402 * Marshaled data has major and minor version numbers stored along
2403 * with the object information. In normal use, marshaling can only
2404 * load data written with the same major version number and an equal
2405 * or lower minor version number. If Ruby's ``verbose'' flag is set
2406 * (normally using -d, -v, -w, or --verbose) the major and minor
2407 * numbers must match exactly. Marshal versioning is independent of
2408 * Ruby's version numbers. You can extract the version by reading the
2409 * first two bytes of marshaled data.
2411 * str = Marshal.dump("thing")
2412 * RUBY_VERSION #=> "1.9.0"
2416 * Some objects cannot be dumped: if the objects to be dumped include
2417 * bindings, procedure or method objects, instances of class IO, or
2418 * singleton objects, a TypeError will be raised.
2420 * If your class has special serialization needs (for example, if you
2421 * want to serialize in some specific format), or if it contains
2422 * objects that would otherwise not be serializable, you can implement
2423 * your own serialization strategy.
2425 * There are two methods of doing this, your object can define either
2426 * marshal_dump and marshal_load or _dump and _load. marshal_dump will take
2427 * precedence over _dump if both are defined. marshal_dump may result in
2428 * smaller Marshal strings.
2430 * == Security considerations
2432 * By design, Marshal.load can deserialize almost any class loaded into the
2433 * Ruby process. In many cases this can lead to remote code execution if the
2434 * Marshal data is loaded from an untrusted source.
2436 * As a result, Marshal.load is not suitable as a general purpose serialization
2437 * format and you should never unmarshal user supplied input or other untrusted
2440 * If you need to deserialize untrusted data, use JSON or another serialization
2441 * format that is only able to load simple, 'primitive' types such as String,
2442 * Array, Hash, etc. Never allow user input to specify arbitrary types to
2445 * == marshal_dump and marshal_load
2447 * When dumping an object the method marshal_dump will be called.
2448 * marshal_dump must return a result containing the information necessary for
2449 * marshal_load to reconstitute the object. The result can be any object.
2451 * When loading an object dumped using marshal_dump the object is first
2452 * allocated then marshal_load is called with the result from marshal_dump.
2453 * marshal_load must recreate the object from the information in the result.
2458 * def initialize name, version, data
2460 * @version = version
2468 * def marshal_load array
2469 * @name, @version = array
2473 * == _dump and _load
2475 * Use _dump and _load when you need to allocate the object you're restoring
2478 * When dumping an object the instance method _dump is called with an Integer
2479 * which indicates the maximum depth of objects to dump (a value of -1 implies
2480 * that you should disable depth checking). _dump must return a String
2481 * containing the information necessary to reconstitute the object.
2483 * The class method _load should take a String and use it to return an object
2484 * of the same class.
2489 * def initialize name, version, data
2491 * @version = version
2496 * [@name, @version].join ':'
2499 * def self._load args
2500 * new(*args.split(':'))
2504 * Since Marshal.dump outputs a string you can have _dump return a Marshal
2505 * string which is Marshal.loaded in _load for complex objects.
2510 VALUE rb_mMarshal
= rb_define_module("Marshal");
2511 #define set_id(sym) sym = rb_intern_const(name_##sym)
2516 set_id(s_dump_data
);
2517 set_id(s_load_data
);
2524 set_id(s_encoding_short
);
2525 set_id(s_ruby2_keywords_flag
);
2527 rb_define_module_function(rb_mMarshal
, "dump", marshal_dump
, -1);
2530 rb_define_const(rb_mMarshal
, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR
));
2532 rb_define_const(rb_mMarshal
, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR
));
2536 marshal_compat_table_mark_i(st_data_t key
, st_data_t value
, st_data_t _
)
2538 marshal_compat_t
*p
= (marshal_compat_t
*)value
;
2539 rb_gc_mark_movable(p
->newclass
);
2540 rb_gc_mark_movable(p
->oldclass
);
2545 marshal_compat_table_mark(void *tbl
)
2548 st_foreach(tbl
, marshal_compat_table_mark_i
, 0);
2552 marshal_compat_table_free_i(st_data_t key
, st_data_t value
, st_data_t _
)
2554 xfree((marshal_compat_t
*)value
);
2559 marshal_compat_table_free(void *data
)
2561 st_foreach(data
, marshal_compat_table_free_i
, 0);
2562 st_free_table(data
);
2566 marshal_compat_table_memsize(const void *data
)
2568 return st_memsize(data
) + sizeof(marshal_compat_t
) * st_table_size(data
);
2572 marshal_compat_table_compact_i(st_data_t key
, st_data_t value
, st_data_t _
)
2574 marshal_compat_t
*p
= (marshal_compat_t
*)value
;
2575 p
->newclass
= rb_gc_location(p
->newclass
);
2576 p
->oldclass
= rb_gc_location(p
->oldclass
);
2581 marshal_compat_table_compact(void *tbl
)
2584 st_foreach(tbl
, marshal_compat_table_compact_i
, 0);
2587 static const rb_data_type_t marshal_compat_type
= {
2588 .wrap_struct_name
= "marshal_compat_table",
2590 .dmark
= marshal_compat_table_mark
,
2591 .dfree
= marshal_compat_table_free
,
2592 .dsize
= marshal_compat_table_memsize
,
2593 .dcompact
= marshal_compat_table_compact
,
2595 .flags
= RUBY_TYPED_WB_PROTECTED
| RUBY_TYPED_FREE_IMMEDIATELY
,
2599 compat_allocator_table(void)
2601 if (compat_allocator_tbl
) return compat_allocator_tbl
;
2602 compat_allocator_tbl
= st_init_numtable();
2603 compat_allocator_tbl_wrapper
=
2604 TypedData_Wrap_Struct(0, &marshal_compat_type
, compat_allocator_tbl
);
2605 rb_vm_register_global_object(compat_allocator_tbl_wrapper
);
2606 return compat_allocator_tbl
;
2610 rb_marshal_dump(VALUE obj
, VALUE port
)
2612 return rb_marshal_dump_limited(obj
, port
, -1);
2616 rb_marshal_load(VALUE port
)
2618 return rb_marshal_load_with_proc(port
, Qnil
, false);