[ruby/strscan] jruby: Check if len++ walked off the end
[ruby.git] / marshal.c
blobe19bd5f70827cff896a65fbb1f04582652baeb36
1 /**********************************************************************
3 marshal.c -
5 $Author$
6 created at: Thu Apr 27 16:30:01 JST 1995
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
10 **********************************************************************/
12 #include "ruby/internal/config.h"
14 #include <math.h>
15 #ifdef HAVE_FLOAT_H
16 #include <float.h>
17 #endif
18 #ifdef HAVE_IEEEFP_H
19 #include <ieeefp.h>
20 #endif
22 #include "encindex.h"
23 #include "id_table.h"
24 #include "internal.h"
25 #include "internal/array.h"
26 #include "internal/bignum.h"
27 #include "internal/class.h"
28 #include "internal/encoding.h"
29 #include "internal/error.h"
30 #include "internal/hash.h"
31 #include "internal/numeric.h"
32 #include "internal/object.h"
33 #include "internal/struct.h"
34 #include "internal/symbol.h"
35 #include "internal/util.h"
36 #include "internal/vm.h"
37 #include "ruby/io.h"
38 #include "ruby/ruby.h"
39 #include "ruby/st.h"
40 #include "ruby/util.h"
41 #include "builtin.h"
42 #include "shape.h"
43 #include "ruby/internal/attr/nonstring.h"
45 #define BITSPERSHORT (2*CHAR_BIT)
46 #define SHORTMASK ((1<<BITSPERSHORT)-1)
47 #define SHORTDN(x) RSHIFT((x),BITSPERSHORT)
49 #if SIZEOF_SHORT == SIZEOF_BDIGIT
50 #define SHORTLEN(x) (x)
51 #else
52 static size_t
53 shortlen(size_t len, BDIGIT *ds)
55 BDIGIT num;
56 int offset = 0;
58 num = ds[len-1];
59 while (num) {
60 num = SHORTDN(num);
61 offset++;
63 return (len - 1)*SIZEOF_BDIGIT/2 + offset;
65 #define SHORTLEN(x) shortlen((x),d)
66 #endif
68 #define MARSHAL_MAJOR 4
69 #define MARSHAL_MINOR 8
71 #define TYPE_NIL '0'
72 #define TYPE_TRUE 'T'
73 #define TYPE_FALSE 'F'
74 #define TYPE_FIXNUM 'i'
76 #define TYPE_EXTENDED 'e'
77 #define TYPE_UCLASS 'C'
78 #define TYPE_OBJECT 'o'
79 #define TYPE_DATA 'd'
80 #define TYPE_USERDEF 'u'
81 #define TYPE_USRMARSHAL 'U'
82 #define TYPE_FLOAT 'f'
83 #define TYPE_BIGNUM 'l'
84 #define TYPE_STRING '"'
85 #define TYPE_REGEXP '/'
86 #define TYPE_ARRAY '['
87 #define TYPE_HASH '{'
88 #define TYPE_HASH_DEF '}'
89 #define TYPE_STRUCT 'S'
90 #define TYPE_MODULE_OLD 'M'
91 #define TYPE_CLASS 'c'
92 #define TYPE_MODULE 'm'
94 #define TYPE_SYMBOL ':'
95 #define TYPE_SYMLINK ';'
97 #define TYPE_IVAR 'I'
98 #define TYPE_LINK '@'
100 static ID s_dump, s_load, s_mdump, s_mload;
101 static ID s_dump_data, s_load_data, s_alloc, s_call;
102 static ID s_getbyte, s_read, s_write, s_binmode;
103 static ID s_encoding_short, s_ruby2_keywords_flag;
104 #define s_encoding_long rb_id_encoding()
106 #define name_s_dump "_dump"
107 #define name_s_load "_load"
108 #define name_s_mdump "marshal_dump"
109 #define name_s_mload "marshal_load"
110 #define name_s_dump_data "_dump_data"
111 #define name_s_load_data "_load_data"
112 #define name_s_alloc "_alloc"
113 #define name_s_call "call"
114 #define name_s_getbyte "getbyte"
115 #define name_s_read "read"
116 #define name_s_write "write"
117 #define name_s_binmode "binmode"
118 #define name_s_encoding_short "E"
119 #define name_s_encoding_long "encoding"
120 #define name_s_ruby2_keywords_flag "K"
122 typedef struct {
123 VALUE newclass;
124 VALUE oldclass;
125 VALUE (*dumper)(VALUE);
126 VALUE (*loader)(VALUE, VALUE);
127 } marshal_compat_t;
129 static st_table *compat_allocator_tbl;
130 static VALUE compat_allocator_tbl_wrapper;
131 static VALUE rb_marshal_dump_limited(VALUE obj, VALUE port, int limit);
132 static VALUE rb_marshal_load_with_proc(VALUE port, VALUE proc, bool freeze);
134 static st_table *compat_allocator_table(void);
136 void
137 rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE))
139 marshal_compat_t *compat;
140 rb_alloc_func_t allocator = rb_get_alloc_func(newclass);
142 if (!allocator) {
143 rb_raise(rb_eTypeError, "no allocator");
146 compat_allocator_table();
147 compat = ALLOC(marshal_compat_t);
148 RB_OBJ_WRITE(compat_allocator_tbl_wrapper, &compat->newclass, newclass);
149 RB_OBJ_WRITE(compat_allocator_tbl_wrapper, &compat->oldclass, oldclass);
150 compat->dumper = dumper;
151 compat->loader = loader;
153 st_insert(compat_allocator_table(), (st_data_t)allocator, (st_data_t)compat);
156 struct dump_arg {
157 VALUE str, dest;
158 st_table *symbols;
159 st_table *data;
160 st_table *compat_tbl;
161 st_table *encodings;
162 st_table *userdefs;
163 st_index_t num_entries;
166 struct dump_call_arg {
167 VALUE obj;
168 struct dump_arg *arg;
169 int limit;
172 static VALUE
173 check_dump_arg(VALUE ret, struct dump_arg *arg, const char *name)
175 if (!arg->symbols) {
176 rb_raise(rb_eRuntimeError, "Marshal.dump reentered at %s",
177 name);
179 return ret;
182 static VALUE
183 check_userdump_arg(VALUE obj, ID sym, int argc, const VALUE *argv,
184 struct dump_arg *arg, const char *name)
186 VALUE ret = rb_funcallv(obj, sym, argc, argv);
187 VALUE klass = CLASS_OF(obj);
188 if (CLASS_OF(ret) == klass) {
189 rb_raise(rb_eRuntimeError, "%"PRIsVALUE"#%s returned same class instance",
190 klass, name);
192 return check_dump_arg(ret, arg, name);
195 #define dump_funcall(arg, obj, sym, argc, argv) \
196 check_userdump_arg(obj, sym, argc, argv, arg, name_##sym)
197 #define dump_check_funcall(arg, obj, sym, argc, argv) \
198 check_dump_arg(rb_check_funcall(obj, sym, argc, argv), arg, name_##sym)
200 static void clear_dump_arg(struct dump_arg *arg);
202 static void
203 mark_dump_arg(void *ptr)
205 struct dump_arg *p = ptr;
206 if (!p->symbols)
207 return;
208 rb_mark_set(p->symbols);
209 rb_mark_set(p->data);
210 rb_mark_hash(p->compat_tbl);
211 rb_mark_set(p->userdefs);
212 rb_gc_mark(p->str);
215 static void
216 free_dump_arg(void *ptr)
218 clear_dump_arg(ptr);
221 static size_t
222 memsize_dump_arg(const void *ptr)
224 const struct dump_arg *p = (struct dump_arg *)ptr;
225 size_t memsize = 0;
226 if (p->symbols) memsize += rb_st_memsize(p->symbols);
227 if (p->data) memsize += rb_st_memsize(p->data);
228 if (p->compat_tbl) memsize += rb_st_memsize(p->compat_tbl);
229 if (p->userdefs) memsize += rb_st_memsize(p->userdefs);
230 if (p->encodings) memsize += rb_st_memsize(p->encodings);
231 return memsize;
234 static const rb_data_type_t dump_arg_data = {
235 "dump_arg",
236 {mark_dump_arg, free_dump_arg, memsize_dump_arg,},
237 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE
240 static VALUE
241 must_not_be_anonymous(const char *type, VALUE path)
243 char *n = RSTRING_PTR(path);
245 if (!rb_enc_asciicompat(rb_enc_get(path))) {
246 /* cannot occur? */
247 rb_raise(rb_eTypeError, "can't dump non-ascii %s name % "PRIsVALUE,
248 type, path);
250 if (n[0] == '#') {
251 rb_raise(rb_eTypeError, "can't dump anonymous %s % "PRIsVALUE,
252 type, path);
254 return path;
257 static VALUE
258 class2path(VALUE klass)
260 VALUE path = rb_class_path(klass);
262 must_not_be_anonymous((RB_TYPE_P(klass, T_CLASS) ? "class" : "module"), path);
263 if (rb_path_to_class(path) != rb_class_real(klass)) {
264 rb_raise(rb_eTypeError, "% "PRIsVALUE" can't be referred to", path);
266 return path;
269 int ruby_marshal_write_long(long x, char *buf);
270 static void w_long(long, struct dump_arg*);
271 static int w_encoding(VALUE encname, struct dump_call_arg *arg);
272 static VALUE encoding_name(VALUE obj, struct dump_arg *arg);
274 static void
275 w_nbyte(const char *s, long n, struct dump_arg *arg)
277 VALUE buf = arg->str;
278 rb_str_buf_cat(buf, s, n);
279 if (arg->dest && RSTRING_LEN(buf) >= BUFSIZ) {
280 rb_io_write(arg->dest, buf);
281 rb_str_resize(buf, 0);
285 static void
286 w_byte(char c, struct dump_arg *arg)
288 w_nbyte(&c, 1, arg);
291 static void
292 w_bytes(const char *s, long n, struct dump_arg *arg)
294 w_long(n, arg);
295 w_nbyte(s, n, arg);
298 #define w_cstr(s, arg) w_bytes((s), strlen(s), (arg))
300 static void
301 w_short(int x, struct dump_arg *arg)
303 w_byte((char)((x >> 0) & 0xff), arg);
304 w_byte((char)((x >> 8) & 0xff), arg);
307 static void
308 w_long(long x, struct dump_arg *arg)
310 char buf[sizeof(long)+1];
311 int i = ruby_marshal_write_long(x, buf);
312 if (i < 0) {
313 rb_raise(rb_eTypeError, "long too big to dump");
315 w_nbyte(buf, i, arg);
319 ruby_marshal_write_long(long x, char *buf)
321 int i;
323 #if SIZEOF_LONG > 4
324 if (!(RSHIFT(x, 31) == 0 || RSHIFT(x, 31) == -1)) {
325 /* big long does not fit in 4 bytes */
326 return -1;
328 #endif
330 if (x == 0) {
331 buf[0] = 0;
332 return 1;
334 if (0 < x && x < 123) {
335 buf[0] = (char)(x + 5);
336 return 1;
338 if (-124 < x && x < 0) {
339 buf[0] = (char)((x - 5)&0xff);
340 return 1;
342 for (i=1;i<(int)sizeof(long)+1;i++) {
343 buf[i] = (char)(x & 0xff);
344 x = RSHIFT(x,8);
345 if (x == 0) {
346 buf[0] = i;
347 break;
349 if (x == -1) {
350 buf[0] = -i;
351 break;
354 return i+1;
357 #ifdef DBL_MANT_DIG
358 #define DECIMAL_MANT (53-16) /* from IEEE754 double precision */
360 #if DBL_MANT_DIG > 32
361 #define MANT_BITS 32
362 #elif DBL_MANT_DIG > 24
363 #define MANT_BITS 24
364 #elif DBL_MANT_DIG > 16
365 #define MANT_BITS 16
366 #else
367 #define MANT_BITS 8
368 #endif
370 static double
371 load_mantissa(double d, const char *buf, long len)
373 if (!len) return d;
374 if (--len > 0 && !*buf++) { /* binary mantissa mark */
375 int e, s = d < 0, dig = 0;
376 unsigned long m;
378 modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d);
379 do {
380 m = 0;
381 switch (len) {
382 default: m = *buf++ & 0xff; /* fall through */
383 #if MANT_BITS > 24
384 case 3: m = (m << 8) | (*buf++ & 0xff); /* fall through */
385 #endif
386 #if MANT_BITS > 16
387 case 2: m = (m << 8) | (*buf++ & 0xff); /* fall through */
388 #endif
389 #if MANT_BITS > 8
390 case 1: m = (m << 8) | (*buf++ & 0xff);
391 #endif
393 dig -= len < MANT_BITS / 8 ? 8 * (unsigned)len : MANT_BITS;
394 d += ldexp((double)m, dig);
395 } while ((len -= MANT_BITS / 8) > 0);
396 d = ldexp(d, e - DECIMAL_MANT);
397 if (s) d = -d;
399 return d;
401 #else
402 #define load_mantissa(d, buf, len) (d)
403 #endif
405 #ifdef DBL_DIG
406 #define FLOAT_DIG (DBL_DIG+2)
407 #else
408 #define FLOAT_DIG 17
409 #endif
411 static void
412 w_float(double d, struct dump_arg *arg)
414 char buf[FLOAT_DIG + (DECIMAL_MANT + 7) / 8 + 10];
416 if (isinf(d)) {
417 if (d < 0) w_cstr("-inf", arg);
418 else w_cstr("inf", arg);
420 else if (isnan(d)) {
421 w_cstr("nan", arg);
423 else if (d == 0.0) {
424 if (signbit(d)) w_cstr("-0", arg);
425 else w_cstr("0", arg);
427 else {
428 int decpt, sign, digs, len = 0;
429 char *e, *p = ruby_dtoa(d, 0, 0, &decpt, &sign, &e);
430 if (sign) buf[len++] = '-';
431 digs = (int)(e - p);
432 if (decpt < -3 || decpt > digs) {
433 buf[len++] = p[0];
434 if (--digs > 0) buf[len++] = '.';
435 memcpy(buf + len, p + 1, digs);
436 len += digs;
437 len += snprintf(buf + len, sizeof(buf) - len, "e%d", decpt - 1);
439 else if (decpt > 0) {
440 memcpy(buf + len, p, decpt);
441 len += decpt;
442 if ((digs -= decpt) > 0) {
443 buf[len++] = '.';
444 memcpy(buf + len, p + decpt, digs);
445 len += digs;
448 else {
449 buf[len++] = '0';
450 buf[len++] = '.';
451 if (decpt) {
452 memset(buf + len, '0', -decpt);
453 len -= decpt;
455 memcpy(buf + len, p, digs);
456 len += digs;
458 free(p);
459 w_bytes(buf, len, arg);
463 static void
464 w_symbol(VALUE sym, struct dump_arg *arg)
466 st_data_t num;
467 VALUE encname;
469 if (st_lookup(arg->symbols, sym, &num)) {
470 w_byte(TYPE_SYMLINK, arg);
471 w_long((long)num, arg);
473 else {
474 const VALUE orig_sym = sym;
475 sym = rb_sym2str(sym);
476 if (!sym) {
477 rb_raise(rb_eTypeError, "can't dump anonymous ID %"PRIdVALUE, sym);
479 encname = encoding_name(sym, arg);
480 if (NIL_P(encname) ||
481 is_ascii_string(sym)) {
482 encname = Qnil;
484 else {
485 w_byte(TYPE_IVAR, arg);
487 w_byte(TYPE_SYMBOL, arg);
488 w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg);
489 st_add_direct(arg->symbols, orig_sym, arg->symbols->num_entries);
490 if (!NIL_P(encname)) {
491 struct dump_call_arg c_arg;
492 c_arg.limit = 1;
493 c_arg.arg = arg;
494 w_long(1L, arg);
495 w_encoding(encname, &c_arg);
500 static void
501 w_unique(VALUE s, struct dump_arg *arg)
503 must_not_be_anonymous("class", s);
504 w_symbol(rb_str_intern(s), arg);
507 static void w_object(VALUE,struct dump_arg*,int);
509 static int
510 hash_each(VALUE key, VALUE value, VALUE v)
512 struct dump_call_arg *arg = (void *)v;
513 w_object(key, arg->arg, arg->limit);
514 w_object(value, arg->arg, arg->limit);
515 return ST_CONTINUE;
518 #define SINGLETON_DUMP_UNABLE_P(klass) \
519 (rb_id_table_size(RCLASS_M_TBL(klass)) > 0 || \
520 rb_ivar_count(klass) > 0)
522 static void
523 w_extended(VALUE klass, struct dump_arg *arg, int check)
525 if (check && RCLASS_SINGLETON_P(klass)) {
526 VALUE origin = RCLASS_ORIGIN(klass);
527 if (SINGLETON_DUMP_UNABLE_P(klass) ||
528 (origin != klass && SINGLETON_DUMP_UNABLE_P(origin))) {
529 rb_raise(rb_eTypeError, "singleton can't be dumped");
531 klass = RCLASS_SUPER(klass);
533 while (BUILTIN_TYPE(klass) == T_ICLASS) {
534 if (!FL_TEST(klass, RICLASS_IS_ORIGIN) ||
535 BUILTIN_TYPE(RBASIC(klass)->klass) != T_MODULE) {
536 VALUE path = rb_class_name(RBASIC(klass)->klass);
537 w_byte(TYPE_EXTENDED, arg);
538 w_unique(path, arg);
540 klass = RCLASS_SUPER(klass);
544 static void
545 w_class(char type, VALUE obj, struct dump_arg *arg, int check)
547 VALUE path;
548 st_data_t real_obj;
549 VALUE klass;
551 if (arg->compat_tbl &&
552 st_lookup(arg->compat_tbl, (st_data_t)obj, &real_obj)) {
553 obj = (VALUE)real_obj;
555 klass = CLASS_OF(obj);
556 w_extended(klass, arg, check);
557 w_byte(type, arg);
558 path = class2path(rb_class_real(klass));
559 w_unique(path, arg);
562 static void
563 w_uclass(VALUE obj, VALUE super, struct dump_arg *arg)
565 VALUE klass = CLASS_OF(obj);
567 w_extended(klass, arg, TRUE);
568 klass = rb_class_real(klass);
569 if (klass != super) {
570 w_byte(TYPE_UCLASS, arg);
571 w_unique(class2path(klass), arg);
575 static bool
576 rb_hash_ruby2_keywords_p(VALUE obj)
578 return (RHASH(obj)->basic.flags & RHASH_PASS_AS_KEYWORDS) != 0;
581 static void
582 rb_hash_ruby2_keywords(VALUE obj)
584 RHASH(obj)->basic.flags |= RHASH_PASS_AS_KEYWORDS;
588 * if instance variable name `id` is a special name to be skipped,
589 * returns the name of it. otherwise it cannot be dumped (unnamed),
590 * returns `name` as-is. returns NULL for ID that can be dumped.
592 static inline const char *
593 skipping_ivar_name(const ID id, const char *name)
595 #define IS_SKIPPED_IVAR(idname) \
596 ((id == idname) && (name = name_##idname, true))
597 if (IS_SKIPPED_IVAR(s_encoding_short)) return name;
598 if (IS_SKIPPED_IVAR(s_ruby2_keywords_flag)) return name;
599 if (IS_SKIPPED_IVAR(s_encoding_long)) return name;
600 if (!rb_id2str(id)) return name;
601 return NULL;
604 struct w_ivar_arg {
605 struct dump_call_arg *dump;
606 st_data_t num_ivar;
609 static int
610 w_obj_each(ID id, VALUE value, st_data_t a)
612 struct w_ivar_arg *ivarg = (struct w_ivar_arg *)a;
613 struct dump_call_arg *arg = ivarg->dump;
614 const char unnamed[] = "", *ivname = skipping_ivar_name(id, unnamed);
616 if (ivname) {
617 if (ivname != unnamed) {
618 rb_warn("instance variable '%s' on class %"PRIsVALUE" is not dumped",
619 ivname, CLASS_OF(arg->obj));
621 return ST_CONTINUE;
623 --ivarg->num_ivar;
624 w_symbol(ID2SYM(id), arg->arg);
625 w_object(value, arg->arg, arg->limit);
626 return ST_CONTINUE;
629 static int
630 obj_count_ivars(ID id, VALUE val, st_data_t a)
632 if (!skipping_ivar_name(id, "") && UNLIKELY(!++*(st_index_t *)a)) {
633 rb_raise(rb_eRuntimeError, "too many instance variables");
635 return ST_CONTINUE;
638 static VALUE
639 encoding_name(VALUE obj, struct dump_arg *arg)
641 if (rb_enc_capable(obj)) {
642 int encidx = rb_enc_get_index(obj);
643 rb_encoding *enc = 0;
644 st_data_t name;
646 if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) {
647 return Qnil;
650 /* special treatment for US-ASCII and UTF-8 */
651 if (encidx == rb_usascii_encindex()) {
652 return Qfalse;
654 else if (encidx == rb_utf8_encindex()) {
655 return Qtrue;
658 if (arg->encodings ?
659 !st_lookup(arg->encodings, (st_data_t)rb_enc_name(enc), &name) :
660 (arg->encodings = st_init_strcasetable(), 1)) {
661 name = (st_data_t)rb_str_new_cstr(rb_enc_name(enc));
662 st_insert(arg->encodings, (st_data_t)rb_enc_name(enc), name);
664 return (VALUE)name;
666 else {
667 return Qnil;
671 static int
672 w_encoding(VALUE encname, struct dump_call_arg *arg)
674 int limit = arg->limit;
675 if (limit >= 0) ++limit;
676 switch (encname) {
677 case Qfalse:
678 case Qtrue:
679 w_symbol(ID2SYM(s_encoding_short), arg->arg);
680 w_object(encname, arg->arg, limit);
681 return 1;
682 case Qnil:
683 return 0;
685 w_symbol(ID2SYM(rb_id_encoding()), arg->arg);
686 w_object(encname, arg->arg, limit);
687 return 1;
690 static st_index_t
691 has_ivars(VALUE obj, VALUE encname, VALUE *ivobj)
693 st_index_t num = !NIL_P(encname);
695 if (SPECIAL_CONST_P(obj)) goto generic;
696 switch (BUILTIN_TYPE(obj)) {
697 case T_OBJECT:
698 case T_CLASS:
699 case T_MODULE:
700 break; /* counted elsewhere */
701 case T_HASH:
702 if (rb_hash_ruby2_keywords_p(obj)) ++num;
703 /* fall through */
704 default:
705 generic:
706 rb_ivar_foreach(obj, obj_count_ivars, (st_data_t)&num);
707 if (num) *ivobj = obj;
710 return num;
713 static void
714 w_ivar_each(VALUE obj, st_index_t num, struct dump_call_arg *arg)
716 shape_id_t shape_id = rb_shape_get_shape_id(arg->obj);
717 struct w_ivar_arg ivarg = {arg, num};
718 if (!num) return;
719 rb_ivar_foreach(obj, w_obj_each, (st_data_t)&ivarg);
721 if (shape_id != rb_shape_get_shape_id(arg->obj)) {
722 rb_shape_t * expected_shape = rb_shape_get_shape_by_id(shape_id);
723 rb_shape_t * actual_shape = rb_shape_get_shape(arg->obj);
725 // If the shape tree got _shorter_ then we probably removed an IV
726 // If the shape tree got longer, then we probably added an IV.
727 // The exception message might not be accurate when someone adds and
728 // removes the same number of IVs, but they will still get an exception
729 if (rb_shape_depth(expected_shape) > rb_shape_depth(actual_shape)) {
730 rb_raise(rb_eRuntimeError, "instance variable removed from %"PRIsVALUE" instance",
731 CLASS_OF(arg->obj));
733 else {
734 rb_raise(rb_eRuntimeError, "instance variable added to %"PRIsVALUE" instance",
735 CLASS_OF(arg->obj));
740 static void
741 w_ivar(st_index_t num, VALUE ivobj, VALUE encname, struct dump_call_arg *arg)
743 w_long(num, arg->arg);
744 num -= w_encoding(encname, arg);
745 if (RB_TYPE_P(ivobj, T_HASH) && rb_hash_ruby2_keywords_p(ivobj)) {
746 int limit = arg->limit;
747 if (limit >= 0) ++limit;
748 w_symbol(ID2SYM(s_ruby2_keywords_flag), arg->arg);
749 w_object(Qtrue, arg->arg, limit);
750 num--;
752 if (!UNDEF_P(ivobj) && num) {
753 w_ivar_each(ivobj, num, arg);
757 static void
758 w_objivar(VALUE obj, struct dump_call_arg *arg)
760 st_data_t num = 0;
762 rb_ivar_foreach(obj, obj_count_ivars, (st_data_t)&num);
763 w_long(num, arg->arg);
764 w_ivar_each(obj, num, arg);
767 #if SIZEOF_LONG > 4
768 // Optimized dump for fixnum larger than 31-bits
769 static void
770 w_bigfixnum(VALUE obj, struct dump_arg *arg)
772 RUBY_ASSERT(FIXNUM_P(obj));
774 w_byte(TYPE_BIGNUM, arg);
776 #if SIZEOF_LONG == SIZEOF_VALUE
777 long num, slen_num;
778 num = FIX2LONG(obj);
779 #else
780 long long num, slen_num;
781 num = NUM2LL(obj);
782 #endif
784 char sign = num < 0 ? '-' : '+';
785 w_byte(sign, arg);
787 // Guaranteed not to overflow, as FIXNUM is 1-bit less than long
788 if (num < 0) num = -num;
790 // calculate the size in shorts
791 int slen = 0;
793 slen_num = num;
794 while (slen_num) {
795 slen++;
796 slen_num = SHORTDN(slen_num);
800 RUBY_ASSERT(slen > 0 && slen <= SIZEOF_LONG / 2);
802 w_long((long)slen, arg);
804 for (int i = 0; i < slen; i++) {
805 w_short(num & SHORTMASK, arg);
806 num = SHORTDN(num);
809 // We aren't adding this object to the link table, but we need to increment
810 // the index.
811 arg->num_entries++;
813 RUBY_ASSERT(num == 0);
815 #endif
817 static void
818 w_remember(VALUE obj, struct dump_arg *arg)
820 st_add_direct(arg->data, obj, arg->num_entries++);
823 static void
824 w_object(VALUE obj, struct dump_arg *arg, int limit)
826 struct dump_call_arg c_arg;
827 VALUE ivobj = Qundef;
828 st_data_t num;
829 st_index_t hasiv = 0;
830 VALUE encname = Qnil;
832 if (limit == 0) {
833 rb_raise(rb_eArgError, "exceed depth limit");
836 if (NIL_P(obj)) {
837 w_byte(TYPE_NIL, arg);
839 else if (obj == Qtrue) {
840 w_byte(TYPE_TRUE, arg);
842 else if (obj == Qfalse) {
843 w_byte(TYPE_FALSE, arg);
845 else if (FIXNUM_P(obj)) {
846 #if SIZEOF_LONG <= 4
847 w_byte(TYPE_FIXNUM, arg);
848 w_long(FIX2INT(obj), arg);
849 #else
850 if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) {
851 w_byte(TYPE_FIXNUM, arg);
852 w_long(FIX2LONG(obj), arg);
854 else {
855 w_bigfixnum(obj, arg);
857 #endif
859 else if (SYMBOL_P(obj)) {
860 w_symbol(obj, arg);
862 else {
863 if (st_lookup(arg->data, obj, &num)) {
864 w_byte(TYPE_LINK, arg);
865 w_long((long)num, arg);
866 return;
869 if (limit > 0) limit--;
870 c_arg.limit = limit;
871 c_arg.arg = arg;
872 c_arg.obj = obj;
874 if (FLONUM_P(obj)) {
875 w_remember(obj, arg);
876 w_byte(TYPE_FLOAT, arg);
877 w_float(RFLOAT_VALUE(obj), arg);
878 return;
881 VALUE v;
883 if (!RBASIC_CLASS(obj)) {
884 rb_raise(rb_eTypeError, "can't dump internal %s",
885 rb_builtin_type_name(BUILTIN_TYPE(obj)));
888 if (rb_obj_respond_to(obj, s_mdump, TRUE)) {
889 w_remember(obj, arg);
891 v = dump_funcall(arg, obj, s_mdump, 0, 0);
892 w_class(TYPE_USRMARSHAL, obj, arg, FALSE);
893 w_object(v, arg, limit);
894 return;
896 if (rb_obj_respond_to(obj, s_dump, TRUE)) {
897 VALUE ivobj2 = Qundef;
898 st_index_t hasiv2;
899 VALUE encname2;
901 if (arg->userdefs && st_is_member(arg->userdefs, (st_data_t)obj)) {
902 rb_raise(rb_eRuntimeError, "can't dump recursive object using _dump()");
904 v = INT2NUM(limit);
905 v = dump_funcall(arg, obj, s_dump, 1, &v);
906 if (!RB_TYPE_P(v, T_STRING)) {
907 rb_raise(rb_eTypeError, "_dump() must return string");
909 hasiv = has_ivars(obj, (encname = encoding_name(obj, arg)), &ivobj);
910 hasiv2 = has_ivars(v, (encname2 = encoding_name(v, arg)), &ivobj2);
911 if (hasiv2) {
912 hasiv = hasiv2;
913 ivobj = ivobj2;
914 encname = encname2;
916 if (hasiv) w_byte(TYPE_IVAR, arg);
917 w_class(TYPE_USERDEF, obj, arg, FALSE);
918 w_bytes(RSTRING_PTR(v), RSTRING_LEN(v), arg);
919 if (hasiv) {
920 st_data_t userdefs = (st_data_t)obj;
921 if (!arg->userdefs) {
922 arg->userdefs = rb_init_identtable();
924 st_add_direct(arg->userdefs, userdefs, 0);
925 w_ivar(hasiv, ivobj, encname, &c_arg);
926 st_delete(arg->userdefs, &userdefs, NULL);
928 w_remember(obj, arg);
929 return;
932 w_remember(obj, arg);
934 hasiv = has_ivars(obj, (encname = encoding_name(obj, arg)), &ivobj);
936 st_data_t compat_data;
937 rb_alloc_func_t allocator = rb_get_alloc_func(RBASIC(obj)->klass);
938 if (st_lookup(compat_allocator_tbl,
939 (st_data_t)allocator,
940 &compat_data)) {
941 marshal_compat_t *compat = (marshal_compat_t*)compat_data;
942 VALUE real_obj = obj;
943 obj = compat->dumper(real_obj);
944 if (!arg->compat_tbl) {
945 arg->compat_tbl = rb_init_identtable();
947 st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
948 if (obj != real_obj && UNDEF_P(ivobj)) hasiv = 0;
951 if (hasiv) w_byte(TYPE_IVAR, arg);
953 switch (BUILTIN_TYPE(obj)) {
954 case T_CLASS:
955 if (FL_TEST(obj, FL_SINGLETON)) {
956 rb_raise(rb_eTypeError, "singleton class can't be dumped");
958 w_byte(TYPE_CLASS, arg);
960 VALUE path = class2path(obj);
961 w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
962 RB_GC_GUARD(path);
964 break;
966 case T_MODULE:
967 w_byte(TYPE_MODULE, arg);
969 VALUE path = class2path(obj);
970 w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
971 RB_GC_GUARD(path);
973 break;
975 case T_FLOAT:
976 w_byte(TYPE_FLOAT, arg);
977 w_float(RFLOAT_VALUE(obj), arg);
978 break;
980 case T_BIGNUM:
981 w_byte(TYPE_BIGNUM, arg);
983 char sign = BIGNUM_SIGN(obj) ? '+' : '-';
984 size_t len = BIGNUM_LEN(obj);
985 size_t slen;
986 size_t j;
987 BDIGIT *d = BIGNUM_DIGITS(obj);
989 slen = SHORTLEN(len);
990 if (LONG_MAX < slen) {
991 rb_raise(rb_eTypeError, "too big Bignum can't be dumped");
994 w_byte(sign, arg);
995 w_long((long)slen, arg);
996 for (j = 0; j < len; j++) {
997 #if SIZEOF_BDIGIT > SIZEOF_SHORT
998 BDIGIT num = *d;
999 int i;
1001 for (i=0; i<SIZEOF_BDIGIT; i+=SIZEOF_SHORT) {
1002 w_short(num & SHORTMASK, arg);
1003 num = SHORTDN(num);
1004 if (j == len - 1 && num == 0) break;
1006 #else
1007 w_short(*d, arg);
1008 #endif
1009 d++;
1012 break;
1014 case T_STRING:
1015 w_uclass(obj, rb_cString, arg);
1016 w_byte(TYPE_STRING, arg);
1017 w_bytes(RSTRING_PTR(obj), RSTRING_LEN(obj), arg);
1018 break;
1020 case T_REGEXP:
1021 w_uclass(obj, rb_cRegexp, arg);
1022 w_byte(TYPE_REGEXP, arg);
1024 int opts = rb_reg_options(obj);
1025 w_bytes(RREGEXP_SRC_PTR(obj), RREGEXP_SRC_LEN(obj), arg);
1026 w_byte((char)opts, arg);
1028 break;
1030 case T_ARRAY:
1031 w_uclass(obj, rb_cArray, arg);
1032 w_byte(TYPE_ARRAY, arg);
1034 long i, len = RARRAY_LEN(obj);
1036 w_long(len, arg);
1037 for (i=0; i<RARRAY_LEN(obj); i++) {
1038 w_object(RARRAY_AREF(obj, i), arg, limit);
1039 if (len != RARRAY_LEN(obj)) {
1040 rb_raise(rb_eRuntimeError, "array modified during dump");
1044 break;
1046 case T_HASH:
1047 w_uclass(obj, rb_cHash, arg);
1048 if (rb_hash_compare_by_id_p(obj)) {
1049 w_byte(TYPE_UCLASS, arg);
1050 w_symbol(rb_sym_intern_ascii_cstr("Hash"), arg);
1052 if (NIL_P(RHASH_IFNONE(obj))) {
1053 w_byte(TYPE_HASH, arg);
1055 else if (FL_TEST(obj, RHASH_PROC_DEFAULT)) {
1056 rb_raise(rb_eTypeError, "can't dump hash with default proc");
1058 else {
1059 w_byte(TYPE_HASH_DEF, arg);
1061 w_long(rb_hash_size_num(obj), arg);
1062 rb_hash_foreach(obj, hash_each, (st_data_t)&c_arg);
1063 if (!NIL_P(RHASH_IFNONE(obj))) {
1064 w_object(RHASH_IFNONE(obj), arg, limit);
1066 break;
1068 case T_STRUCT:
1069 w_class(TYPE_STRUCT, obj, arg, TRUE);
1071 long len = RSTRUCT_LEN(obj);
1072 VALUE mem;
1073 long i;
1075 w_long(len, arg);
1076 mem = rb_struct_members(obj);
1077 for (i=0; i<len; i++) {
1078 w_symbol(RARRAY_AREF(mem, i), arg);
1079 w_object(RSTRUCT_GET(obj, i), arg, limit);
1082 break;
1084 case T_OBJECT:
1085 w_class(TYPE_OBJECT, obj, arg, TRUE);
1086 w_objivar(obj, &c_arg);
1087 break;
1089 case T_DATA:
1091 VALUE v;
1093 if (!rb_obj_respond_to(obj, s_dump_data, TRUE)) {
1094 rb_raise(rb_eTypeError,
1095 "no _dump_data is defined for class %"PRIsVALUE,
1096 rb_obj_class(obj));
1098 v = dump_funcall(arg, obj, s_dump_data, 0, 0);
1099 w_class(TYPE_DATA, obj, arg, TRUE);
1100 w_object(v, arg, limit);
1102 break;
1104 default:
1105 rb_raise(rb_eTypeError, "can't dump %"PRIsVALUE,
1106 rb_obj_class(obj));
1107 break;
1109 RB_GC_GUARD(obj);
1111 if (hasiv) {
1112 w_ivar(hasiv, ivobj, encname, &c_arg);
1116 static void
1117 clear_dump_arg(struct dump_arg *arg)
1119 if (!arg->symbols) return;
1120 st_free_table(arg->symbols);
1121 arg->symbols = 0;
1122 st_free_table(arg->data);
1123 arg->data = 0;
1124 arg->num_entries = 0;
1125 if (arg->compat_tbl) {
1126 st_free_table(arg->compat_tbl);
1127 arg->compat_tbl = 0;
1129 if (arg->encodings) {
1130 st_free_table(arg->encodings);
1131 arg->encodings = 0;
1133 if (arg->userdefs) {
1134 st_free_table(arg->userdefs);
1135 arg->userdefs = 0;
1139 NORETURN(static inline void io_needed(void));
1140 static inline void
1141 io_needed(void)
1143 rb_raise(rb_eTypeError, "instance of IO needed");
1147 * call-seq:
1148 * dump( obj [, anIO] , limit=-1 ) -> anIO
1150 * Serializes obj and all descendant objects. If anIO is
1151 * specified, the serialized data will be written to it, otherwise the
1152 * data will be returned as a String. If limit is specified, the
1153 * traversal of subobjects will be limited to that depth. If limit is
1154 * negative, no checking of depth will be performed.
1156 * class Klass
1157 * def initialize(str)
1158 * @str = str
1159 * end
1160 * def say_hello
1161 * @str
1162 * end
1163 * end
1165 * (produces no output)
1167 * o = Klass.new("hello\n")
1168 * data = Marshal.dump(o)
1169 * obj = Marshal.load(data)
1170 * obj.say_hello #=> "hello\n"
1172 * Marshal can't dump following objects:
1173 * * anonymous Class/Module.
1174 * * objects which are related to system (ex: Dir, File::Stat, IO, File, Socket
1175 * and so on)
1176 * * an instance of MatchData, Data, Method, UnboundMethod, Proc, Thread,
1177 * ThreadGroup, Continuation
1178 * * objects which define singleton methods
1180 static VALUE
1181 marshal_dump(int argc, VALUE *argv, VALUE _)
1183 VALUE obj, port, a1, a2;
1184 int limit = -1;
1186 port = Qnil;
1187 rb_scan_args(argc, argv, "12", &obj, &a1, &a2);
1188 if (argc == 3) {
1189 if (!NIL_P(a2)) limit = NUM2INT(a2);
1190 if (NIL_P(a1)) io_needed();
1191 port = a1;
1193 else if (argc == 2) {
1194 if (FIXNUM_P(a1)) limit = FIX2INT(a1);
1195 else if (NIL_P(a1)) io_needed();
1196 else port = a1;
1198 return rb_marshal_dump_limited(obj, port, limit);
1201 VALUE
1202 rb_marshal_dump_limited(VALUE obj, VALUE port, int limit)
1204 struct dump_arg *arg;
1205 VALUE wrapper; /* used to avoid memory leak in case of exception */
1207 wrapper = TypedData_Make_Struct(0, struct dump_arg, &dump_arg_data, arg);
1208 arg->dest = 0;
1209 arg->symbols = st_init_numtable();
1210 arg->data = rb_init_identtable();
1211 arg->num_entries = 0;
1212 arg->compat_tbl = 0;
1213 arg->encodings = 0;
1214 arg->userdefs = 0;
1215 arg->str = rb_str_buf_new(0);
1216 if (!NIL_P(port)) {
1217 if (!rb_respond_to(port, s_write)) {
1218 io_needed();
1220 arg->dest = port;
1221 dump_check_funcall(arg, port, s_binmode, 0, 0);
1223 else {
1224 port = arg->str;
1227 w_byte(MARSHAL_MAJOR, arg);
1228 w_byte(MARSHAL_MINOR, arg);
1230 w_object(obj, arg, limit);
1231 if (arg->dest) {
1232 rb_io_write(arg->dest, arg->str);
1233 rb_str_resize(arg->str, 0);
1235 clear_dump_arg(arg);
1236 RB_GC_GUARD(wrapper);
1238 return port;
1241 struct load_arg {
1242 VALUE src;
1243 char *buf;
1244 long buflen;
1245 long readable;
1246 long offset;
1247 st_table *symbols;
1248 st_table *data;
1249 st_table *partial_objects;
1250 VALUE proc;
1251 st_table *compat_tbl;
1252 bool freeze;
1255 static VALUE
1256 check_load_arg(VALUE ret, struct load_arg *arg, const char *name)
1258 if (!arg->symbols) {
1259 rb_raise(rb_eRuntimeError, "Marshal.load reentered at %s",
1260 name);
1262 return ret;
1264 #define load_funcall(arg, obj, sym, argc, argv) \
1265 check_load_arg(rb_funcallv(obj, sym, argc, argv), arg, name_##sym)
1267 static void clear_load_arg(struct load_arg *arg);
1269 static void
1270 mark_load_arg(void *ptr)
1272 struct load_arg *p = ptr;
1273 if (!p->symbols)
1274 return;
1275 rb_mark_tbl(p->symbols);
1276 rb_mark_tbl(p->data);
1277 rb_mark_tbl(p->partial_objects);
1278 rb_mark_hash(p->compat_tbl);
1281 static void
1282 free_load_arg(void *ptr)
1284 clear_load_arg(ptr);
1287 static size_t
1288 memsize_load_arg(const void *ptr)
1290 const struct load_arg *p = (struct load_arg *)ptr;
1291 size_t memsize = 0;
1292 if (p->symbols) memsize += rb_st_memsize(p->symbols);
1293 if (p->data) memsize += rb_st_memsize(p->data);
1294 if (p->partial_objects) memsize += rb_st_memsize(p->partial_objects);
1295 if (p->compat_tbl) memsize += rb_st_memsize(p->compat_tbl);
1296 return memsize;
1299 static const rb_data_type_t load_arg_data = {
1300 "load_arg",
1301 {mark_load_arg, free_load_arg, memsize_load_arg,},
1302 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE
1305 #define r_entry(v, arg) r_entry0((v), (arg)->data->num_entries, (arg))
1306 static VALUE r_object(struct load_arg *arg);
1307 static VALUE r_symbol(struct load_arg *arg);
1309 NORETURN(static void too_short(void));
1310 static void
1311 too_short(void)
1313 rb_raise(rb_eArgError, "marshal data too short");
1316 static st_index_t
1317 r_prepare(struct load_arg *arg)
1319 st_index_t idx = arg->data->num_entries;
1321 st_insert(arg->data, (st_data_t)idx, (st_data_t)Qundef);
1322 return idx;
1325 static unsigned char
1326 r_byte1_buffered(struct load_arg *arg)
1328 if (arg->buflen == 0) {
1329 long readable = arg->readable < BUFSIZ ? arg->readable : BUFSIZ;
1330 VALUE str, n = LONG2NUM(readable);
1332 str = load_funcall(arg, arg->src, s_read, 1, &n);
1333 if (NIL_P(str)) too_short();
1334 StringValue(str);
1335 memcpy(arg->buf, RSTRING_PTR(str), RSTRING_LEN(str));
1336 arg->offset = 0;
1337 arg->buflen = RSTRING_LEN(str);
1339 arg->buflen--;
1340 return arg->buf[arg->offset++];
1343 static int
1344 r_byte(struct load_arg *arg)
1346 int c;
1348 if (RB_TYPE_P(arg->src, T_STRING)) {
1349 if (RSTRING_LEN(arg->src) > arg->offset) {
1350 c = (unsigned char)RSTRING_PTR(arg->src)[arg->offset++];
1352 else {
1353 too_short();
1356 else {
1357 if (arg->readable >0 || arg->buflen > 0) {
1358 c = r_byte1_buffered(arg);
1360 else {
1361 VALUE v = load_funcall(arg, arg->src, s_getbyte, 0, 0);
1362 if (NIL_P(v)) rb_eof_error();
1363 c = (unsigned char)NUM2CHR(v);
1366 return c;
1369 NORETURN(static void long_toobig(int size));
1371 static void
1372 long_toobig(int size)
1374 rb_raise(rb_eTypeError, "long too big for this architecture (size "
1375 STRINGIZE(SIZEOF_LONG)", given %d)", size);
1378 static long
1379 r_long(struct load_arg *arg)
1381 register long x;
1382 int c = (signed char)r_byte(arg);
1383 long i;
1385 if (c == 0) return 0;
1386 if (c > 0) {
1387 if (4 < c && c < 128) {
1388 return c - 5;
1390 if (c > (int)sizeof(long)) long_toobig(c);
1391 x = 0;
1392 for (i=0;i<c;i++) {
1393 x |= (long)r_byte(arg) << (8*i);
1396 else {
1397 if (-129 < c && c < -4) {
1398 return c + 5;
1400 c = -c;
1401 if (c > (int)sizeof(long)) long_toobig(c);
1402 x = -1;
1403 for (i=0;i<c;i++) {
1404 x &= ~((long)0xff << (8*i));
1405 x |= (long)r_byte(arg) << (8*i);
1408 return x;
1411 long
1412 ruby_marshal_read_long(const char **buf, long len)
1414 long x;
1415 struct RString src;
1416 struct load_arg arg;
1417 memset(&arg, 0, sizeof(arg));
1418 arg.src = rb_setup_fake_str(&src, *buf, len, 0);
1419 x = r_long(&arg);
1420 *buf += arg.offset;
1421 return x;
1424 static VALUE
1425 r_bytes1(long len, struct load_arg *arg)
1427 VALUE str, n = LONG2NUM(len);
1429 str = load_funcall(arg, arg->src, s_read, 1, &n);
1430 if (NIL_P(str)) too_short();
1431 StringValue(str);
1432 if (RSTRING_LEN(str) != len) too_short();
1434 return str;
1437 static VALUE
1438 r_bytes1_buffered(long len, struct load_arg *arg)
1440 VALUE str;
1442 if (len <= arg->buflen) {
1443 str = rb_str_new(arg->buf+arg->offset, len);
1444 arg->offset += len;
1445 arg->buflen -= len;
1447 else {
1448 long buflen = arg->buflen;
1449 long readable = arg->readable + 1;
1450 long tmp_len, read_len, need_len = len - buflen;
1451 VALUE tmp, n;
1453 readable = readable < BUFSIZ ? readable : BUFSIZ;
1454 read_len = need_len > readable ? need_len : readable;
1455 n = LONG2NUM(read_len);
1456 tmp = load_funcall(arg, arg->src, s_read, 1, &n);
1457 if (NIL_P(tmp)) too_short();
1458 StringValue(tmp);
1460 tmp_len = RSTRING_LEN(tmp);
1462 if (tmp_len < need_len) too_short();
1464 str = rb_str_new(arg->buf+arg->offset, buflen);
1465 rb_str_cat(str, RSTRING_PTR(tmp), need_len);
1467 if (tmp_len > need_len) {
1468 buflen = tmp_len - need_len;
1469 memcpy(arg->buf, RSTRING_PTR(tmp)+need_len, buflen);
1470 arg->buflen = buflen;
1472 else {
1473 arg->buflen = 0;
1475 arg->offset = 0;
1478 return str;
1481 #define r_bytes(arg) r_bytes0(r_long(arg), (arg))
1483 static VALUE
1484 r_bytes0(long len, struct load_arg *arg)
1486 VALUE str;
1488 if (len == 0) return rb_str_new(0, 0);
1489 if (RB_TYPE_P(arg->src, T_STRING)) {
1490 if (RSTRING_LEN(arg->src) - arg->offset >= len) {
1491 str = rb_str_new(RSTRING_PTR(arg->src)+arg->offset, len);
1492 arg->offset += len;
1494 else {
1495 too_short();
1498 else {
1499 if (arg->readable > 0 || arg->buflen > 0) {
1500 str = r_bytes1_buffered(len, arg);
1502 else {
1503 str = r_bytes1(len, arg);
1506 return str;
1509 static inline int
1510 name_equal(const char *name, size_t nlen, const char *p, long l)
1512 if ((size_t)l != nlen || *p != *name) return 0;
1513 return nlen == 1 || memcmp(p+1, name+1, nlen-1) == 0;
1516 static int
1517 sym2encidx(VALUE sym, VALUE val)
1519 RBIMPL_ATTR_NONSTRING() static const char name_encoding[8] = "encoding";
1520 const char *p;
1521 long l;
1522 if (rb_enc_get_index(sym) != ENCINDEX_US_ASCII) return -1;
1523 RSTRING_GETMEM(sym, p, l);
1524 if (l <= 0) return -1;
1525 if (name_equal(name_encoding, sizeof(name_encoding), p, l)) {
1526 int idx = rb_enc_find_index(StringValueCStr(val));
1527 return idx;
1529 if (name_equal(name_s_encoding_short, rb_strlen_lit(name_s_encoding_short), p, l)) {
1530 if (val == Qfalse) return rb_usascii_encindex();
1531 else if (val == Qtrue) return rb_utf8_encindex();
1532 /* bogus ignore */
1534 return -1;
1537 static int
1538 symname_equal(VALUE sym, const char *name, size_t nlen)
1540 const char *p;
1541 long l;
1542 if (rb_enc_get_index(sym) != ENCINDEX_US_ASCII) return 0;
1543 RSTRING_GETMEM(sym, p, l);
1544 return name_equal(name, nlen, p, l);
1547 #define BUILD_ASSERT_POSITIVE(n) \
1548 /* make 0 negative to workaround the "zero size array" GCC extension, */ \
1549 ((sizeof(char [2*(ssize_t)(n)-1])+1)/2) /* assuming no overflow */
1550 #define symname_equal_lit(sym, sym_name) \
1551 symname_equal(sym, sym_name, BUILD_ASSERT_POSITIVE(rb_strlen_lit(sym_name)))
1553 static VALUE
1554 r_symlink(struct load_arg *arg)
1556 st_data_t sym;
1557 long num = r_long(arg);
1559 if (!st_lookup(arg->symbols, num, &sym)) {
1560 rb_raise(rb_eArgError, "bad symbol");
1562 return (VALUE)sym;
1565 static VALUE
1566 r_symreal(struct load_arg *arg, int ivar)
1568 VALUE s = r_bytes(arg);
1569 VALUE sym;
1570 int idx = -1;
1571 st_index_t n = arg->symbols->num_entries;
1573 if (rb_enc_str_asciionly_p(s)) rb_enc_associate_index(s, ENCINDEX_US_ASCII);
1574 st_insert(arg->symbols, (st_data_t)n, (st_data_t)s);
1575 if (ivar) {
1576 long num = r_long(arg);
1577 while (num-- > 0) {
1578 sym = r_symbol(arg);
1579 idx = sym2encidx(sym, r_object(arg));
1582 if (idx > 0) {
1583 rb_enc_associate_index(s, idx);
1584 if (is_broken_string(s)) {
1585 rb_raise(rb_eArgError, "invalid byte sequence in %s: %+"PRIsVALUE,
1586 rb_enc_name(rb_enc_from_index(idx)), s);
1590 return s;
1593 static VALUE
1594 r_symbol(struct load_arg *arg)
1596 int type, ivar = 0;
1598 again:
1599 switch ((type = r_byte(arg))) {
1600 default:
1601 rb_raise(rb_eArgError, "dump format error for symbol(0x%x)", type);
1602 case TYPE_IVAR:
1603 ivar = 1;
1604 goto again;
1605 case TYPE_SYMBOL:
1606 return r_symreal(arg, ivar);
1607 case TYPE_SYMLINK:
1608 if (ivar) {
1609 rb_raise(rb_eArgError, "dump format error (symlink with encoding)");
1611 return r_symlink(arg);
1615 static VALUE
1616 r_unique(struct load_arg *arg)
1618 return r_symbol(arg);
1621 static VALUE
1622 r_string(struct load_arg *arg)
1624 return r_bytes(arg);
1627 static VALUE
1628 r_entry0(VALUE v, st_index_t num, struct load_arg *arg)
1630 st_data_t real_obj = (st_data_t)v;
1631 if (arg->compat_tbl) {
1632 /* real_obj is kept if not found */
1633 st_lookup(arg->compat_tbl, v, &real_obj);
1635 st_insert(arg->data, num, real_obj);
1636 st_insert(arg->partial_objects, (st_data_t)real_obj, Qtrue);
1637 return v;
1640 static VALUE
1641 r_fixup_compat(VALUE v, struct load_arg *arg)
1643 st_data_t data;
1644 st_data_t key = (st_data_t)v;
1645 if (arg->compat_tbl && st_delete(arg->compat_tbl, &key, &data)) {
1646 VALUE real_obj = (VALUE)data;
1647 rb_alloc_func_t allocator = rb_get_alloc_func(CLASS_OF(real_obj));
1648 if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
1649 marshal_compat_t *compat = (marshal_compat_t*)data;
1650 compat->loader(real_obj, v);
1652 v = real_obj;
1654 return v;
1657 static VALUE
1658 r_post_proc(VALUE v, struct load_arg *arg)
1660 if (arg->proc) {
1661 v = load_funcall(arg, arg->proc, s_call, 1, &v);
1663 return v;
1666 static VALUE
1667 r_leave(VALUE v, struct load_arg *arg, bool partial)
1669 v = r_fixup_compat(v, arg);
1670 if (!partial) {
1671 st_data_t data;
1672 st_data_t key = (st_data_t)v;
1673 st_delete(arg->partial_objects, &key, &data);
1674 if (arg->freeze) {
1675 if (RB_TYPE_P(v, T_MODULE) || RB_TYPE_P(v, T_CLASS)) {
1676 // noop
1678 else if (RB_TYPE_P(v, T_STRING)) {
1679 v = rb_str_to_interned_str(v);
1681 else {
1682 OBJ_FREEZE(v);
1685 v = r_post_proc(v, arg);
1687 return v;
1690 static int
1691 copy_ivar_i(ID vid, VALUE value, st_data_t arg)
1693 VALUE obj = (VALUE)arg;
1695 if (!rb_ivar_defined(obj, vid))
1696 rb_ivar_set(obj, vid, value);
1697 return ST_CONTINUE;
1700 static VALUE
1701 r_copy_ivar(VALUE v, VALUE data)
1703 rb_ivar_foreach(data, copy_ivar_i, (st_data_t)v);
1704 return v;
1707 #define override_ivar_error(type, str) \
1708 rb_raise(rb_eTypeError, \
1709 "can't override instance variable of "type" '%"PRIsVALUE"'", \
1710 (str))
1712 static void
1713 r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg)
1715 long len;
1717 len = r_long(arg);
1718 if (len > 0) {
1719 if (RB_TYPE_P(obj, T_MODULE)) {
1720 override_ivar_error("module", rb_mod_name(obj));
1722 else if (RB_TYPE_P(obj, T_CLASS)) {
1723 override_ivar_error("class", rb_class_name(obj));
1725 do {
1726 VALUE sym = r_symbol(arg);
1727 VALUE val = r_object(arg);
1728 int idx = sym2encidx(sym, val);
1729 if (idx >= 0) {
1730 if (rb_enc_capable(obj)) {
1731 rb_enc_associate_index(obj, idx);
1733 else {
1734 rb_raise(rb_eArgError, "%"PRIsVALUE" is not enc_capable", obj);
1736 if (has_encoding) *has_encoding = TRUE;
1738 else if (symname_equal_lit(sym, name_s_ruby2_keywords_flag)) {
1739 if (RB_TYPE_P(obj, T_HASH)) {
1740 rb_hash_ruby2_keywords(obj);
1742 else {
1743 rb_raise(rb_eArgError, "ruby2_keywords flag is given but %"PRIsVALUE" is not a Hash", obj);
1746 else {
1747 rb_ivar_set(obj, rb_intern_str(sym), val);
1749 } while (--len > 0);
1753 static VALUE
1754 path2class(VALUE path)
1756 VALUE v = rb_path_to_class(path);
1758 if (!RB_TYPE_P(v, T_CLASS)) {
1759 rb_raise(rb_eArgError, "%"PRIsVALUE" does not refer to class", path);
1761 return v;
1764 #define path2module(path) must_be_module(rb_path_to_class(path), path)
1766 static VALUE
1767 must_be_module(VALUE v, VALUE path)
1769 if (!RB_TYPE_P(v, T_MODULE)) {
1770 rb_raise(rb_eArgError, "%"PRIsVALUE" does not refer to module", path);
1772 return v;
1775 static VALUE
1776 obj_alloc_by_klass(VALUE klass, struct load_arg *arg, VALUE *oldclass)
1778 st_data_t data;
1779 rb_alloc_func_t allocator;
1781 allocator = rb_get_alloc_func(klass);
1782 if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
1783 marshal_compat_t *compat = (marshal_compat_t*)data;
1784 VALUE real_obj = rb_obj_alloc(klass);
1785 VALUE obj = rb_obj_alloc(compat->oldclass);
1786 if (oldclass) *oldclass = compat->oldclass;
1788 if (!arg->compat_tbl) {
1789 arg->compat_tbl = rb_init_identtable();
1791 st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
1792 return obj;
1795 return rb_obj_alloc(klass);
1798 static VALUE
1799 obj_alloc_by_path(VALUE path, struct load_arg *arg)
1801 return obj_alloc_by_klass(path2class(path), arg, 0);
1804 static VALUE
1805 append_extmod(VALUE obj, VALUE extmod)
1807 long i = RARRAY_LEN(extmod);
1808 while (i > 0) {
1809 VALUE m = RARRAY_AREF(extmod, --i);
1810 rb_extend_object(obj, m);
1812 return obj;
1815 #define prohibit_ivar(type, str) do { \
1816 if (!ivp || !*ivp) break; \
1817 override_ivar_error(type, str); \
1818 } while (0)
1820 static VALUE r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int type);
1822 static VALUE
1823 r_object0(struct load_arg *arg, bool partial, int *ivp, VALUE extmod)
1825 int type = r_byte(arg);
1826 return r_object_for(arg, partial, ivp, extmod, type);
1829 static VALUE
1830 r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int type)
1832 VALUE (*hash_new_with_size)(st_index_t) = rb_hash_new_with_size;
1833 VALUE v = Qnil;
1834 long id;
1835 st_data_t link;
1837 switch (type) {
1838 case TYPE_LINK:
1839 id = r_long(arg);
1840 if (!st_lookup(arg->data, (st_data_t)id, &link)) {
1841 rb_raise(rb_eArgError, "dump format error (unlinked)");
1843 v = (VALUE)link;
1844 if (!st_lookup(arg->partial_objects, (st_data_t)v, &link)) {
1845 v = r_post_proc(v, arg);
1847 break;
1849 case TYPE_IVAR:
1851 int ivar = TRUE;
1852 v = r_object0(arg, true, &ivar, extmod);
1853 if (ivar) r_ivar(v, NULL, arg);
1854 v = r_leave(v, arg, partial);
1856 break;
1858 case TYPE_EXTENDED:
1860 VALUE path = r_unique(arg);
1861 VALUE m = rb_path_to_class(path);
1862 if (NIL_P(extmod)) extmod = rb_ary_hidden_new(0);
1864 if (RB_TYPE_P(m, T_CLASS)) { /* prepended */
1865 VALUE c;
1867 v = r_object0(arg, true, 0, Qnil);
1868 c = CLASS_OF(v);
1869 if (c != m || FL_TEST(c, FL_SINGLETON)) {
1870 rb_raise(rb_eArgError,
1871 "prepended class %"PRIsVALUE" differs from class %"PRIsVALUE,
1872 path, rb_class_name(c));
1874 c = rb_singleton_class(v);
1875 while (RARRAY_LEN(extmod) > 0) {
1876 m = rb_ary_pop(extmod);
1877 rb_prepend_module(c, m);
1880 else {
1881 must_be_module(m, path);
1882 rb_ary_push(extmod, m);
1884 v = r_object0(arg, true, 0, extmod);
1885 while (RARRAY_LEN(extmod) > 0) {
1886 m = rb_ary_pop(extmod);
1887 rb_extend_object(v, m);
1890 v = r_leave(v, arg, partial);
1892 break;
1894 case TYPE_UCLASS:
1896 VALUE c = path2class(r_unique(arg));
1898 if (FL_TEST(c, FL_SINGLETON)) {
1899 rb_raise(rb_eTypeError, "singleton can't be loaded");
1901 type = r_byte(arg);
1902 if ((c == rb_cHash) &&
1903 /* Hack for compare_by_identify */
1904 (type == TYPE_HASH || type == TYPE_HASH_DEF)) {
1905 hash_new_with_size = rb_ident_hash_new_with_size;
1906 goto type_hash;
1908 v = r_object_for(arg, partial, 0, extmod, type);
1909 if (RB_SPECIAL_CONST_P(v) || RB_TYPE_P(v, T_OBJECT) || RB_TYPE_P(v, T_CLASS)) {
1910 goto format_error;
1912 if (RB_TYPE_P(v, T_MODULE) || !RTEST(rb_class_inherited_p(c, RBASIC(v)->klass))) {
1913 VALUE tmp = rb_obj_alloc(c);
1915 if (TYPE(v) != TYPE(tmp)) goto format_error;
1917 RBASIC_SET_CLASS(v, c);
1919 break;
1921 format_error:
1922 rb_raise(rb_eArgError, "dump format error (user class)");
1924 case TYPE_NIL:
1925 v = Qnil;
1926 v = r_leave(v, arg, false);
1927 break;
1929 case TYPE_TRUE:
1930 v = Qtrue;
1931 v = r_leave(v, arg, false);
1932 break;
1934 case TYPE_FALSE:
1935 v = Qfalse;
1936 v = r_leave(v, arg, false);
1937 break;
1939 case TYPE_FIXNUM:
1941 long i = r_long(arg);
1942 v = LONG2FIX(i);
1944 v = r_leave(v, arg, false);
1945 break;
1947 case TYPE_FLOAT:
1949 double d;
1950 VALUE str = r_bytes(arg);
1951 const char *ptr = RSTRING_PTR(str);
1953 if (strcmp(ptr, "nan") == 0) {
1954 d = nan("");
1956 else if (strcmp(ptr, "inf") == 0) {
1957 d = HUGE_VAL;
1959 else if (strcmp(ptr, "-inf") == 0) {
1960 d = -HUGE_VAL;
1962 else {
1963 char *e;
1964 d = strtod(ptr, &e);
1965 d = load_mantissa(d, e, RSTRING_LEN(str) - (e - ptr));
1967 v = DBL2NUM(d);
1968 v = r_entry(v, arg);
1969 v = r_leave(v, arg, false);
1971 break;
1973 case TYPE_BIGNUM:
1975 long len;
1976 VALUE data;
1977 int sign;
1979 sign = r_byte(arg);
1980 len = r_long(arg);
1982 if (SIZEOF_VALUE >= 8 && len <= 4) {
1983 // Representable within uintptr, likely FIXNUM
1984 VALUE num = 0;
1985 for (int i = 0; i < len; i++) {
1986 num |= (VALUE)r_byte(arg) << (i * 16);
1987 num |= (VALUE)r_byte(arg) << (i * 16 + 8);
1989 #if SIZEOF_VALUE == SIZEOF_LONG
1990 v = ULONG2NUM(num);
1991 #else
1992 v = ULL2NUM(num);
1993 #endif
1994 if (sign == '-') {
1995 v = rb_int_uminus(v);
1998 else {
1999 data = r_bytes0(len * 2, arg);
2000 v = rb_integer_unpack(RSTRING_PTR(data), len, 2, 0,
2001 INTEGER_PACK_LITTLE_ENDIAN | (sign == '-' ? INTEGER_PACK_NEGATIVE : 0));
2002 rb_str_resize(data, 0L);
2004 v = r_entry(v, arg);
2005 v = r_leave(v, arg, false);
2007 break;
2009 case TYPE_STRING:
2010 v = r_entry(r_string(arg), arg);
2011 v = r_leave(v, arg, partial);
2012 break;
2014 case TYPE_REGEXP:
2016 VALUE str = r_bytes(arg);
2017 int options = r_byte(arg);
2018 int has_encoding = FALSE;
2019 st_index_t idx = r_prepare(arg);
2021 if (ivp) {
2022 r_ivar(str, &has_encoding, arg);
2023 *ivp = FALSE;
2025 if (!has_encoding) {
2026 /* 1.8 compatibility; remove escapes undefined in 1.8 */
2027 char *ptr = RSTRING_PTR(str), *dst = ptr, *src = ptr;
2028 long len = RSTRING_LEN(str);
2029 long bs = 0;
2030 for (; len-- > 0; *dst++ = *src++) {
2031 switch (*src) {
2032 case '\\': bs++; break;
2033 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2034 case 'm': case 'o': case 'p': case 'q': case 'u': case 'y':
2035 case 'E': case 'F': case 'H': case 'I': case 'J': case 'K':
2036 case 'L': case 'N': case 'O': case 'P': case 'Q': case 'R':
2037 case 'S': case 'T': case 'U': case 'V': case 'X': case 'Y':
2038 if (bs & 1) --dst;
2039 /* fall through */
2040 default: bs = 0; break;
2043 rb_str_set_len(str, dst - ptr);
2045 VALUE regexp = rb_reg_new_str(str, options);
2046 r_copy_ivar(regexp, str);
2048 v = r_entry0(regexp, idx, arg);
2049 v = r_leave(v, arg, partial);
2051 break;
2053 case TYPE_ARRAY:
2055 long len = r_long(arg);
2057 v = rb_ary_new2(len);
2058 v = r_entry(v, arg);
2059 arg->readable += len - 1;
2060 while (len--) {
2061 rb_ary_push(v, r_object(arg));
2062 arg->readable--;
2064 v = r_leave(v, arg, partial);
2065 arg->readable++;
2067 break;
2069 case TYPE_HASH:
2070 case TYPE_HASH_DEF:
2071 type_hash:
2073 long len = r_long(arg);
2075 v = hash_new_with_size(len);
2076 v = r_entry(v, arg);
2077 arg->readable += (len - 1) * 2;
2078 while (len--) {
2079 VALUE key = r_object(arg);
2080 VALUE value = r_object(arg);
2081 rb_hash_aset(v, key, value);
2082 arg->readable -= 2;
2084 arg->readable += 2;
2085 if (type == TYPE_HASH_DEF) {
2086 RHASH_SET_IFNONE(v, r_object(arg));
2088 v = r_leave(v, arg, partial);
2090 break;
2092 case TYPE_STRUCT:
2094 VALUE mem, values;
2095 long i;
2096 VALUE slot;
2097 st_index_t idx = r_prepare(arg);
2098 VALUE klass = path2class(r_unique(arg));
2099 long len = r_long(arg);
2101 v = rb_obj_alloc(klass);
2102 if (!RB_TYPE_P(v, T_STRUCT)) {
2103 rb_raise(rb_eTypeError, "class %"PRIsVALUE" not a struct", rb_class_name(klass));
2105 mem = rb_struct_s_members(klass);
2106 if (RARRAY_LEN(mem) != len) {
2107 rb_raise(rb_eTypeError, "struct %"PRIsVALUE" not compatible (struct size differs)",
2108 rb_class_name(klass));
2111 arg->readable += (len - 1) * 2;
2112 v = r_entry0(v, idx, arg);
2113 values = rb_ary_new2(len);
2115 VALUE keywords = Qfalse;
2116 if (RTEST(rb_struct_s_keyword_init(klass))) {
2117 keywords = rb_hash_new();
2118 rb_ary_push(values, keywords);
2121 for (i=0; i<len; i++) {
2122 VALUE n = rb_sym2str(RARRAY_AREF(mem, i));
2123 slot = r_symbol(arg);
2125 if (!rb_str_equal(n, slot)) {
2126 rb_raise(rb_eTypeError, "struct %"PRIsVALUE" not compatible (:%"PRIsVALUE" for :%"PRIsVALUE")",
2127 rb_class_name(klass),
2128 slot, n);
2130 if (keywords) {
2131 rb_hash_aset(keywords, RARRAY_AREF(mem, i), r_object(arg));
2133 else {
2134 rb_ary_push(values, r_object(arg));
2136 arg->readable -= 2;
2139 rb_struct_initialize(v, values);
2140 v = r_leave(v, arg, partial);
2141 arg->readable += 2;
2143 break;
2145 case TYPE_USERDEF:
2147 VALUE name = r_unique(arg);
2148 VALUE klass = path2class(name);
2149 VALUE data;
2150 st_data_t d;
2152 if (!rb_obj_respond_to(klass, s_load, TRUE)) {
2153 rb_raise(rb_eTypeError, "class %"PRIsVALUE" needs to have method '_load'",
2154 name);
2156 data = r_string(arg);
2157 if (ivp) {
2158 r_ivar(data, NULL, arg);
2159 *ivp = FALSE;
2161 v = load_funcall(arg, klass, s_load, 1, &data);
2162 v = r_entry(v, arg);
2163 if (st_lookup(compat_allocator_tbl, (st_data_t)rb_get_alloc_func(klass), &d)) {
2164 marshal_compat_t *compat = (marshal_compat_t*)d;
2165 v = compat->loader(klass, v);
2167 if (!partial) {
2168 if (arg->freeze) {
2169 OBJ_FREEZE(v);
2171 v = r_post_proc(v, arg);
2174 break;
2176 case TYPE_USRMARSHAL:
2178 VALUE name = r_unique(arg);
2179 VALUE klass = path2class(name);
2180 VALUE oldclass = 0;
2181 VALUE data;
2183 v = obj_alloc_by_klass(klass, arg, &oldclass);
2184 if (!NIL_P(extmod)) {
2185 /* for the case marshal_load is overridden */
2186 append_extmod(v, extmod);
2188 if (!rb_obj_respond_to(v, s_mload, TRUE)) {
2189 rb_raise(rb_eTypeError, "instance of %"PRIsVALUE" needs to have method 'marshal_load'",
2190 name);
2192 v = r_entry(v, arg);
2193 data = r_object(arg);
2194 load_funcall(arg, v, s_mload, 1, &data);
2195 v = r_fixup_compat(v, arg);
2196 v = r_copy_ivar(v, data);
2197 if (arg->freeze) {
2198 OBJ_FREEZE(v);
2200 v = r_post_proc(v, arg);
2201 if (!NIL_P(extmod)) {
2202 if (oldclass) append_extmod(v, extmod);
2203 rb_ary_clear(extmod);
2206 break;
2208 case TYPE_OBJECT:
2210 st_index_t idx = r_prepare(arg);
2211 v = obj_alloc_by_path(r_unique(arg), arg);
2212 if (!RB_TYPE_P(v, T_OBJECT)) {
2213 rb_raise(rb_eArgError, "dump format error");
2215 v = r_entry0(v, idx, arg);
2216 r_ivar(v, NULL, arg);
2217 v = r_leave(v, arg, partial);
2219 break;
2221 case TYPE_DATA:
2223 VALUE name = r_unique(arg);
2224 VALUE klass = path2class(name);
2225 VALUE oldclass = 0;
2226 VALUE r;
2228 v = obj_alloc_by_klass(klass, arg, &oldclass);
2229 if (!RB_TYPE_P(v, T_DATA)) {
2230 rb_raise(rb_eArgError, "dump format error");
2232 v = r_entry(v, arg);
2233 if (!rb_obj_respond_to(v, s_load_data, TRUE)) {
2234 rb_raise(rb_eTypeError,
2235 "class %"PRIsVALUE" needs to have instance method '_load_data'",
2236 name);
2238 r = r_object0(arg, partial, 0, extmod);
2239 load_funcall(arg, v, s_load_data, 1, &r);
2240 v = r_leave(v, arg, partial);
2242 break;
2244 case TYPE_MODULE_OLD:
2246 VALUE str = r_bytes(arg);
2248 v = rb_path_to_class(str);
2249 prohibit_ivar("class/module", str);
2250 v = r_entry(v, arg);
2251 v = r_leave(v, arg, partial);
2253 break;
2255 case TYPE_CLASS:
2257 VALUE str = r_bytes(arg);
2259 v = path2class(str);
2260 prohibit_ivar("class", str);
2261 v = r_entry(v, arg);
2262 v = r_leave(v, arg, partial);
2264 break;
2266 case TYPE_MODULE:
2268 VALUE str = r_bytes(arg);
2270 v = path2module(str);
2271 prohibit_ivar("module", str);
2272 v = r_entry(v, arg);
2273 v = r_leave(v, arg, partial);
2275 break;
2277 case TYPE_SYMBOL:
2278 if (ivp) {
2279 v = r_symreal(arg, *ivp);
2280 *ivp = FALSE;
2282 else {
2283 v = r_symreal(arg, 0);
2285 v = rb_str_intern(v);
2286 v = r_leave(v, arg, partial);
2287 break;
2289 case TYPE_SYMLINK:
2290 v = rb_str_intern(r_symlink(arg));
2291 break;
2293 default:
2294 rb_raise(rb_eArgError, "dump format error(0x%x)", type);
2295 break;
2298 if (UNDEF_P(v)) {
2299 rb_raise(rb_eArgError, "dump format error (bad link)");
2302 return v;
2305 static VALUE
2306 r_object(struct load_arg *arg)
2308 return r_object0(arg, false, 0, Qnil);
2311 static void
2312 clear_load_arg(struct load_arg *arg)
2314 xfree(arg->buf);
2315 arg->buf = NULL;
2316 arg->buflen = 0;
2317 arg->offset = 0;
2318 arg->readable = 0;
2319 if (!arg->symbols) return;
2320 st_free_table(arg->symbols);
2321 arg->symbols = 0;
2322 st_free_table(arg->data);
2323 arg->data = 0;
2324 st_free_table(arg->partial_objects);
2325 arg->partial_objects = 0;
2326 if (arg->compat_tbl) {
2327 st_free_table(arg->compat_tbl);
2328 arg->compat_tbl = 0;
2332 VALUE
2333 rb_marshal_load_with_proc(VALUE port, VALUE proc, bool freeze)
2335 int major, minor;
2336 VALUE v;
2337 VALUE wrapper; /* used to avoid memory leak in case of exception */
2338 struct load_arg *arg;
2340 v = rb_check_string_type(port);
2341 if (!NIL_P(v)) {
2342 port = v;
2344 else if (rb_respond_to(port, s_getbyte) && rb_respond_to(port, s_read)) {
2345 rb_check_funcall(port, s_binmode, 0, 0);
2347 else {
2348 io_needed();
2350 wrapper = TypedData_Make_Struct(0, struct load_arg, &load_arg_data, arg);
2351 arg->src = port;
2352 arg->offset = 0;
2353 arg->symbols = st_init_numtable();
2354 arg->data = rb_init_identtable();
2355 arg->partial_objects = rb_init_identtable();
2356 arg->compat_tbl = 0;
2357 arg->proc = 0;
2358 arg->readable = 0;
2359 arg->freeze = freeze;
2361 if (NIL_P(v))
2362 arg->buf = xmalloc(BUFSIZ);
2363 else
2364 arg->buf = 0;
2366 major = r_byte(arg);
2367 minor = r_byte(arg);
2368 if (major != MARSHAL_MAJOR || minor > MARSHAL_MINOR) {
2369 clear_load_arg(arg);
2370 rb_raise(rb_eTypeError, "incompatible marshal file format (can't be read)\n\
2371 \tformat version %d.%d required; %d.%d given",
2372 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
2374 if (RTEST(ruby_verbose) && minor != MARSHAL_MINOR) {
2375 rb_warn("incompatible marshal file format (can be read)\n\
2376 \tformat version %d.%d required; %d.%d given",
2377 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
2380 if (!NIL_P(proc)) arg->proc = proc;
2381 v = r_object(arg);
2382 clear_load_arg(arg);
2383 RB_GC_GUARD(wrapper);
2385 return v;
2388 static VALUE
2389 marshal_load(rb_execution_context_t *ec, VALUE mod, VALUE source, VALUE proc, VALUE freeze)
2391 return rb_marshal_load_with_proc(source, proc, RTEST(freeze));
2394 #include "marshal.rbinc"
2397 * The marshaling library converts collections of Ruby objects into a
2398 * byte stream, allowing them to be stored outside the currently
2399 * active script. This data may subsequently be read and the original
2400 * objects reconstituted.
2402 * Marshaled data has major and minor version numbers stored along
2403 * with the object information. In normal use, marshaling can only
2404 * load data written with the same major version number and an equal
2405 * or lower minor version number. If Ruby's ``verbose'' flag is set
2406 * (normally using -d, -v, -w, or --verbose) the major and minor
2407 * numbers must match exactly. Marshal versioning is independent of
2408 * Ruby's version numbers. You can extract the version by reading the
2409 * first two bytes of marshaled data.
2411 * str = Marshal.dump("thing")
2412 * RUBY_VERSION #=> "1.9.0"
2413 * str[0].ord #=> 4
2414 * str[1].ord #=> 8
2416 * Some objects cannot be dumped: if the objects to be dumped include
2417 * bindings, procedure or method objects, instances of class IO, or
2418 * singleton objects, a TypeError will be raised.
2420 * If your class has special serialization needs (for example, if you
2421 * want to serialize in some specific format), or if it contains
2422 * objects that would otherwise not be serializable, you can implement
2423 * your own serialization strategy.
2425 * There are two methods of doing this, your object can define either
2426 * marshal_dump and marshal_load or _dump and _load. marshal_dump will take
2427 * precedence over _dump if both are defined. marshal_dump may result in
2428 * smaller Marshal strings.
2430 * == Security considerations
2432 * By design, Marshal.load can deserialize almost any class loaded into the
2433 * Ruby process. In many cases this can lead to remote code execution if the
2434 * Marshal data is loaded from an untrusted source.
2436 * As a result, Marshal.load is not suitable as a general purpose serialization
2437 * format and you should never unmarshal user supplied input or other untrusted
2438 * data.
2440 * If you need to deserialize untrusted data, use JSON or another serialization
2441 * format that is only able to load simple, 'primitive' types such as String,
2442 * Array, Hash, etc. Never allow user input to specify arbitrary types to
2443 * deserialize into.
2445 * == marshal_dump and marshal_load
2447 * When dumping an object the method marshal_dump will be called.
2448 * marshal_dump must return a result containing the information necessary for
2449 * marshal_load to reconstitute the object. The result can be any object.
2451 * When loading an object dumped using marshal_dump the object is first
2452 * allocated then marshal_load is called with the result from marshal_dump.
2453 * marshal_load must recreate the object from the information in the result.
2455 * Example:
2457 * class MyObj
2458 * def initialize name, version, data
2459 * @name = name
2460 * @version = version
2461 * @data = data
2462 * end
2464 * def marshal_dump
2465 * [@name, @version]
2466 * end
2468 * def marshal_load array
2469 * @name, @version = array
2470 * end
2471 * end
2473 * == _dump and _load
2475 * Use _dump and _load when you need to allocate the object you're restoring
2476 * yourself.
2478 * When dumping an object the instance method _dump is called with an Integer
2479 * which indicates the maximum depth of objects to dump (a value of -1 implies
2480 * that you should disable depth checking). _dump must return a String
2481 * containing the information necessary to reconstitute the object.
2483 * The class method _load should take a String and use it to return an object
2484 * of the same class.
2486 * Example:
2488 * class MyObj
2489 * def initialize name, version, data
2490 * @name = name
2491 * @version = version
2492 * @data = data
2493 * end
2495 * def _dump level
2496 * [@name, @version].join ':'
2497 * end
2499 * def self._load args
2500 * new(*args.split(':'))
2501 * end
2502 * end
2504 * Since Marshal.dump outputs a string you can have _dump return a Marshal
2505 * string which is Marshal.loaded in _load for complex objects.
2507 void
2508 Init_marshal(void)
2510 VALUE rb_mMarshal = rb_define_module("Marshal");
2511 #define set_id(sym) sym = rb_intern_const(name_##sym)
2512 set_id(s_dump);
2513 set_id(s_load);
2514 set_id(s_mdump);
2515 set_id(s_mload);
2516 set_id(s_dump_data);
2517 set_id(s_load_data);
2518 set_id(s_alloc);
2519 set_id(s_call);
2520 set_id(s_getbyte);
2521 set_id(s_read);
2522 set_id(s_write);
2523 set_id(s_binmode);
2524 set_id(s_encoding_short);
2525 set_id(s_ruby2_keywords_flag);
2527 rb_define_module_function(rb_mMarshal, "dump", marshal_dump, -1);
2529 /* major version */
2530 rb_define_const(rb_mMarshal, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR));
2531 /* minor version */
2532 rb_define_const(rb_mMarshal, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR));
2535 static int
2536 marshal_compat_table_mark_i(st_data_t key, st_data_t value, st_data_t _)
2538 marshal_compat_t *p = (marshal_compat_t *)value;
2539 rb_gc_mark_movable(p->newclass);
2540 rb_gc_mark_movable(p->oldclass);
2541 return ST_CONTINUE;
2544 static void
2545 marshal_compat_table_mark(void *tbl)
2547 if (!tbl) return;
2548 st_foreach(tbl, marshal_compat_table_mark_i, 0);
2551 static int
2552 marshal_compat_table_free_i(st_data_t key, st_data_t value, st_data_t _)
2554 xfree((marshal_compat_t *)value);
2555 return ST_CONTINUE;
2558 static void
2559 marshal_compat_table_free(void *data)
2561 st_foreach(data, marshal_compat_table_free_i, 0);
2562 st_free_table(data);
2565 static size_t
2566 marshal_compat_table_memsize(const void *data)
2568 return st_memsize(data) + sizeof(marshal_compat_t) * st_table_size(data);
2571 static int
2572 marshal_compat_table_compact_i(st_data_t key, st_data_t value, st_data_t _)
2574 marshal_compat_t *p = (marshal_compat_t *)value;
2575 p->newclass = rb_gc_location(p->newclass);
2576 p->oldclass = rb_gc_location(p->oldclass);
2577 return ST_CONTINUE;
2580 static void
2581 marshal_compat_table_compact(void *tbl)
2583 if (!tbl) return;
2584 st_foreach(tbl, marshal_compat_table_compact_i, 0);
2587 static const rb_data_type_t marshal_compat_type = {
2588 .wrap_struct_name = "marshal_compat_table",
2589 .function = {
2590 .dmark = marshal_compat_table_mark,
2591 .dfree = marshal_compat_table_free,
2592 .dsize = marshal_compat_table_memsize,
2593 .dcompact = marshal_compat_table_compact,
2595 .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY,
2598 static st_table *
2599 compat_allocator_table(void)
2601 if (compat_allocator_tbl) return compat_allocator_tbl;
2602 compat_allocator_tbl = st_init_numtable();
2603 compat_allocator_tbl_wrapper =
2604 TypedData_Wrap_Struct(0, &marshal_compat_type, compat_allocator_tbl);
2605 rb_vm_register_global_object(compat_allocator_tbl_wrapper);
2606 return compat_allocator_tbl;
2609 VALUE
2610 rb_marshal_dump(VALUE obj, VALUE port)
2612 return rb_marshal_dump_limited(obj, port, -1);
2615 VALUE
2616 rb_marshal_load(VALUE port)
2618 return rb_marshal_load_with_proc(port, Qnil, false);