[ruby/psych] Add support for ruby 3.2 Data objects
[ruby.git] / pack.c
blob3a5c1bfb9677cfa01efb07f634063510939941e7
1 /**********************************************************************
3 pack.c -
5 $Author$
6 created at: Thu Feb 10 15:17:05 JST 1994
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
10 **********************************************************************/
12 #include "ruby/internal/config.h"
14 #include <ctype.h>
15 #include <errno.h>
16 #include <float.h>
17 #include <sys/types.h>
19 #include "internal.h"
20 #include "internal/array.h"
21 #include "internal/bits.h"
22 #include "internal/string.h"
23 #include "internal/symbol.h"
24 #include "internal/variable.h"
25 #include "ruby/util.h"
27 #include "builtin.h"
30 * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
31 * instead of HAVE_LONG_LONG or LONG_LONG.
32 * This means q! and Q! means always the standard long long type and
33 * causes ArgumentError for platforms which has no long long type,
34 * even if the platform has an implementation specific 64bit type.
35 * This behavior is consistent with the document of pack/unpack.
37 #ifdef HAVE_TRUE_LONG_LONG
38 static const char natstr[] = "sSiIlLqQjJ";
39 # define endstr natstr
40 #else
41 static const char natstr[] = "sSiIlLjJ";
42 static const char endstr[] = "sSiIlLqQjJ";
43 #endif
45 #ifdef HAVE_TRUE_LONG_LONG
46 /* It is intentional to use long long instead of LONG_LONG. */
47 # define NATINT_LEN_Q NATINT_LEN(long long, 8)
48 #else
49 # define NATINT_LEN_Q 8
50 #endif
52 #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
53 # define NATINT_PACK
54 #endif
56 #ifdef DYNAMIC_ENDIAN
57 /* for universal binary of NEXTSTEP and MacOS X */
58 /* useless since autoconf 2.63? */
59 static int
60 is_bigendian(void)
62 static int init = 0;
63 static int endian_value;
64 char *p;
66 if (init) return endian_value;
67 init = 1;
68 p = (char*)&init;
69 return endian_value = p[0]?0:1;
71 # define BIGENDIAN_P() (is_bigendian())
72 #elif defined(WORDS_BIGENDIAN)
73 # define BIGENDIAN_P() 1
74 #else
75 # define BIGENDIAN_P() 0
76 #endif
78 #ifdef NATINT_PACK
79 # define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
80 #else
81 # define NATINT_LEN(type,len) ((int)sizeof(type))
82 #endif
84 typedef union {
85 float f;
86 uint32_t u;
87 char buf[4];
88 } FLOAT_SWAPPER;
89 typedef union {
90 double d;
91 uint64_t u;
92 char buf[8];
93 } DOUBLE_SWAPPER;
94 #define swapf(x) swap32(x)
95 #define swapd(x) swap64(x)
97 #define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
98 #define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
99 #define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
100 #define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
101 #define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
102 #define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
103 #define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
104 #define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
106 #define FLOAT_CONVWITH(x) FLOAT_SWAPPER x;
107 #define HTONF(x) ((x).u = rb_htonf((x).u))
108 #define HTOVF(x) ((x).u = rb_htovf((x).u))
109 #define NTOHF(x) ((x).u = rb_ntohf((x).u))
110 #define VTOHF(x) ((x).u = rb_vtohf((x).u))
112 #define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x;
113 #define HTOND(x) ((x).u = rb_htond((x).u))
114 #define HTOVD(x) ((x).u = rb_htovd((x).u))
115 #define NTOHD(x) ((x).u = rb_ntohd((x).u))
116 #define VTOHD(x) ((x).u = rb_vtohd((x).u))
118 #define MAX_INTEGER_PACK_SIZE 8
120 static const char toofew[] = "too few arguments";
122 static void encodes(VALUE,const char*,long,int,int);
123 static void qpencode(VALUE,VALUE,long);
125 static unsigned long utf8_to_uv(const char*,long*);
127 static ID id_associated;
129 static void
130 str_associate(VALUE str, VALUE add)
132 /* assert(NIL_P(rb_attr_get(str, id_associated))); */
133 rb_ivar_set(str, id_associated, add);
136 static VALUE
137 str_associated(VALUE str)
139 VALUE associates = rb_ivar_lookup(str, id_associated, Qfalse);
140 if (!associates)
141 rb_raise(rb_eArgError, "no associated pointer");
142 return associates;
145 static VALUE
146 associated_pointer(VALUE associates, const char *t)
148 const VALUE *p = RARRAY_CONST_PTR(associates);
149 const VALUE *pend = p + RARRAY_LEN(associates);
150 for (; p < pend; p++) {
151 VALUE tmp = *p;
152 if (RB_TYPE_P(tmp, T_STRING) && RSTRING_PTR(tmp) == t) return tmp;
154 rb_raise(rb_eArgError, "non associated pointer");
155 UNREACHABLE_RETURN(Qnil);
158 RBIMPL_ATTR_NORETURN()
159 static void
160 unknown_directive(const char *mode, char type, VALUE fmt)
162 char unknown[5];
164 if (ISPRINT(type)) {
165 unknown[0] = type;
166 unknown[1] = '\0';
168 else {
169 snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff);
171 fmt = rb_str_quote_unprintable(fmt);
172 rb_raise(rb_eArgError, "unknown %s directive '%s' in '%"PRIsVALUE"'",
173 mode, unknown, fmt);
176 static float
177 VALUE_to_float(VALUE obj)
179 VALUE v = rb_to_float(obj);
180 double d = RFLOAT_VALUE(v);
182 if (isnan(d)) {
183 return NAN;
185 else if (d < -FLT_MAX) {
186 return -INFINITY;
188 else if (d <= FLT_MAX) {
189 return d;
191 else {
192 return INFINITY;
196 static void
197 str_expand_fill(VALUE res, int c, long len)
199 long olen = RSTRING_LEN(res);
200 memset(RSTRING_PTR(res) + olen, c, len);
201 rb_str_set_len(res, olen + len);
204 static char *
205 skip_to_eol(const char *p, const char *pend)
207 p = memchr(p, '\n', pend - p);
208 return (char *)(p ? p + 1 : pend);
211 #define skip_blank(p, type) \
212 (ISSPACE(type) || (type == '#' && (p = skip_to_eol(p, pend), 1)))
214 #ifndef NATINT_PACK
215 # define pack_modifiers(p, t, n, e) pack_modifiers(p, t, e)
216 #endif
217 static char *
218 pack_modifiers(const char *p, char type, int *natint, int *explicit_endian)
220 while (1) {
221 switch (*p) {
222 case '_':
223 case '!':
224 if (strchr(natstr, type)) {
225 #ifdef NATINT_PACK
226 *natint = 1;
227 #endif
228 p++;
230 else {
231 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
233 break;
235 case '<':
236 case '>':
237 if (!strchr(endstr, type)) {
238 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
240 if (*explicit_endian) {
241 rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
243 *explicit_endian = *p++;
244 break;
245 default:
246 return (char *)p;
251 static VALUE
252 pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
254 const char *p, *pend;
255 VALUE res, from, associates = 0;
256 long len, idx, plen;
257 const char *ptr;
258 int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
259 int integer_size, bigendian_p;
261 StringValue(fmt);
262 rb_must_asciicompat(fmt);
263 p = RSTRING_PTR(fmt);
264 pend = p + RSTRING_LEN(fmt);
266 if (NIL_P(buffer)) {
267 res = rb_str_buf_new(0);
269 else {
270 if (!RB_TYPE_P(buffer, T_STRING))
271 rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer));
272 rb_str_modify(buffer);
273 res = buffer;
276 idx = 0;
278 #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
279 #define MORE_ITEM (idx < RARRAY_LEN(ary))
280 #define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW)
281 #define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW)
283 while (p < pend) {
284 int explicit_endian = 0;
285 if (RSTRING_END(fmt) != pend) {
286 rb_raise(rb_eRuntimeError, "format string modified");
288 const char type = *p++; /* get data type */
289 #ifdef NATINT_PACK
290 int natint = 0; /* native integer */
291 #endif
293 if (skip_blank(p, type)) continue;
294 p = pack_modifiers(p, type, &natint, &explicit_endian);
296 if (*p == '*') { /* set data length */
297 len = strchr("@Xxu", type) ? 0
298 : strchr("PMm", type) ? 1
299 : RARRAY_LEN(ary) - idx;
300 p++;
302 else if (ISDIGIT(*p)) {
303 errno = 0;
304 len = STRTOUL(p, (char**)&p, 10);
305 if (errno) {
306 rb_raise(rb_eRangeError, "pack length too big");
309 else {
310 len = 1;
313 switch (type) {
314 case 'U':
315 /* if encoding is US-ASCII, upgrade to UTF-8 */
316 if (enc_info == 1) enc_info = 2;
317 break;
318 case 'm': case 'M': case 'u':
319 /* keep US-ASCII (do nothing) */
320 break;
321 default:
322 /* fall back to BINARY */
323 enc_info = 0;
324 break;
326 switch (type) {
327 case 'A': case 'a': case 'Z':
328 case 'B': case 'b':
329 case 'H': case 'h':
330 from = NEXTFROM;
331 if (NIL_P(from)) {
332 ptr = "";
333 plen = 0;
335 else {
336 StringValue(from);
337 ptr = RSTRING_PTR(from);
338 plen = RSTRING_LEN(from);
341 if (p[-1] == '*')
342 len = plen;
344 switch (type) {
345 case 'a': /* arbitrary binary string (null padded) */
346 case 'A': /* arbitrary binary string (ASCII space padded) */
347 case 'Z': /* null terminated string */
348 if (plen >= len) {
349 rb_str_buf_cat(res, ptr, len);
350 if (p[-1] == '*' && type == 'Z')
351 rb_str_buf_cat(res, "", 1);
353 else {
354 rb_str_modify_expand(res, len);
355 rb_str_buf_cat(res, ptr, plen);
356 str_expand_fill(res, (type == 'A' ? ' ' : '\0'), len - plen);
358 break;
360 #define castchar(from) (char)((from) & 0xff)
362 case 'b': /* bit string (ascending) */
364 int byte = 0;
365 long i, j = 0;
367 if (len > plen) {
368 j = (len - plen + 1)/2;
369 len = plen;
371 for (i=0; i++ < len; ptr++) {
372 if (*ptr & 1)
373 byte |= 128;
374 if (i & 7)
375 byte >>= 1;
376 else {
377 char c = castchar(byte);
378 rb_str_buf_cat(res, &c, 1);
379 byte = 0;
382 if (len & 7) {
383 char c;
384 byte >>= 7 - (len & 7);
385 c = castchar(byte);
386 rb_str_buf_cat(res, &c, 1);
388 len = j;
389 goto grow;
391 break;
393 case 'B': /* bit string (descending) */
395 int byte = 0;
396 long i, j = 0;
398 if (len > plen) {
399 j = (len - plen + 1)/2;
400 len = plen;
402 for (i=0; i++ < len; ptr++) {
403 byte |= *ptr & 1;
404 if (i & 7)
405 byte <<= 1;
406 else {
407 char c = castchar(byte);
408 rb_str_buf_cat(res, &c, 1);
409 byte = 0;
412 if (len & 7) {
413 char c;
414 byte <<= 7 - (len & 7);
415 c = castchar(byte);
416 rb_str_buf_cat(res, &c, 1);
418 len = j;
419 goto grow;
421 break;
423 case 'h': /* hex string (low nibble first) */
425 int byte = 0;
426 long i, j = 0;
428 if (len > plen) {
429 j = (len + 1) / 2 - (plen + 1) / 2;
430 len = plen;
432 for (i=0; i++ < len; ptr++) {
433 if (ISALPHA(*ptr))
434 byte |= (((*ptr & 15) + 9) & 15) << 4;
435 else
436 byte |= (*ptr & 15) << 4;
437 if (i & 1)
438 byte >>= 4;
439 else {
440 char c = castchar(byte);
441 rb_str_buf_cat(res, &c, 1);
442 byte = 0;
445 if (len & 1) {
446 char c = castchar(byte);
447 rb_str_buf_cat(res, &c, 1);
449 len = j;
450 goto grow;
452 break;
454 case 'H': /* hex string (high nibble first) */
456 int byte = 0;
457 long i, j = 0;
459 if (len > plen) {
460 j = (len + 1) / 2 - (plen + 1) / 2;
461 len = plen;
463 for (i=0; i++ < len; ptr++) {
464 if (ISALPHA(*ptr))
465 byte |= ((*ptr & 15) + 9) & 15;
466 else
467 byte |= *ptr & 15;
468 if (i & 1)
469 byte <<= 4;
470 else {
471 char c = castchar(byte);
472 rb_str_buf_cat(res, &c, 1);
473 byte = 0;
476 if (len & 1) {
477 char c = castchar(byte);
478 rb_str_buf_cat(res, &c, 1);
480 len = j;
481 goto grow;
483 break;
485 break;
487 case 'c': /* signed char */
488 case 'C': /* unsigned char */
489 integer_size = 1;
490 bigendian_p = BIGENDIAN_P(); /* not effective */
491 goto pack_integer;
493 case 's': /* s for int16_t, s! for signed short */
494 case 'S': /* S for uint16_t, S! for unsigned short */
495 integer_size = NATINT_LEN(short, 2);
496 bigendian_p = BIGENDIAN_P();
497 goto pack_integer;
499 case 'i': /* i and i! for signed int */
500 case 'I': /* I and I! for unsigned int */
501 integer_size = (int)sizeof(int);
502 bigendian_p = BIGENDIAN_P();
503 goto pack_integer;
505 case 'l': /* l for int32_t, l! for signed long */
506 case 'L': /* L for uint32_t, L! for unsigned long */
507 integer_size = NATINT_LEN(long, 4);
508 bigendian_p = BIGENDIAN_P();
509 goto pack_integer;
511 case 'q': /* q for int64_t, q! for signed long long */
512 case 'Q': /* Q for uint64_t, Q! for unsigned long long */
513 integer_size = NATINT_LEN_Q;
514 bigendian_p = BIGENDIAN_P();
515 goto pack_integer;
517 case 'j': /* j for intptr_t */
518 integer_size = sizeof(intptr_t);
519 bigendian_p = BIGENDIAN_P();
520 goto pack_integer;
522 case 'J': /* J for uintptr_t */
523 integer_size = sizeof(uintptr_t);
524 bigendian_p = BIGENDIAN_P();
525 goto pack_integer;
527 case 'n': /* 16 bit (2 bytes) integer (network byte-order) */
528 integer_size = 2;
529 bigendian_p = 1;
530 goto pack_integer;
532 case 'N': /* 32 bit (4 bytes) integer (network byte-order) */
533 integer_size = 4;
534 bigendian_p = 1;
535 goto pack_integer;
537 case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */
538 integer_size = 2;
539 bigendian_p = 0;
540 goto pack_integer;
542 case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */
543 integer_size = 4;
544 bigendian_p = 0;
545 goto pack_integer;
547 pack_integer:
548 if (explicit_endian) {
549 bigendian_p = explicit_endian == '>';
551 if (integer_size > MAX_INTEGER_PACK_SIZE)
552 rb_bug("unexpected integer size for pack: %d", integer_size);
553 while (len-- > 0) {
554 char intbuf[MAX_INTEGER_PACK_SIZE];
556 from = NEXTFROM;
557 rb_integer_pack(from, intbuf, integer_size, 1, 0,
558 INTEGER_PACK_2COMP |
559 (bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN));
560 rb_str_buf_cat(res, intbuf, integer_size);
562 break;
564 case 'f': /* single precision float in native format */
565 case 'F': /* ditto */
566 while (len-- > 0) {
567 float f;
569 from = NEXTFROM;
570 f = VALUE_to_float(from);
571 rb_str_buf_cat(res, (char*)&f, sizeof(float));
573 break;
575 case 'e': /* single precision float in VAX byte-order */
576 while (len-- > 0) {
577 FLOAT_CONVWITH(tmp);
579 from = NEXTFROM;
580 tmp.f = VALUE_to_float(from);
581 HTOVF(tmp);
582 rb_str_buf_cat(res, tmp.buf, sizeof(float));
584 break;
586 case 'E': /* double precision float in VAX byte-order */
587 while (len-- > 0) {
588 DOUBLE_CONVWITH(tmp);
589 from = NEXTFROM;
590 tmp.d = RFLOAT_VALUE(rb_to_float(from));
591 HTOVD(tmp);
592 rb_str_buf_cat(res, tmp.buf, sizeof(double));
594 break;
596 case 'd': /* double precision float in native format */
597 case 'D': /* ditto */
598 while (len-- > 0) {
599 double d;
601 from = NEXTFROM;
602 d = RFLOAT_VALUE(rb_to_float(from));
603 rb_str_buf_cat(res, (char*)&d, sizeof(double));
605 break;
607 case 'g': /* single precision float in network byte-order */
608 while (len-- > 0) {
609 FLOAT_CONVWITH(tmp);
610 from = NEXTFROM;
611 tmp.f = VALUE_to_float(from);
612 HTONF(tmp);
613 rb_str_buf_cat(res, tmp.buf, sizeof(float));
615 break;
617 case 'G': /* double precision float in network byte-order */
618 while (len-- > 0) {
619 DOUBLE_CONVWITH(tmp);
621 from = NEXTFROM;
622 tmp.d = RFLOAT_VALUE(rb_to_float(from));
623 HTOND(tmp);
624 rb_str_buf_cat(res, tmp.buf, sizeof(double));
626 break;
628 case 'x': /* null byte */
629 grow:
630 rb_str_modify_expand(res, len);
631 str_expand_fill(res, '\0', len);
632 break;
634 case 'X': /* back up byte */
635 shrink:
636 plen = RSTRING_LEN(res);
637 if (plen < len)
638 rb_raise(rb_eArgError, "X outside of string");
639 rb_str_set_len(res, plen - len);
640 break;
642 case '@': /* null fill to absolute position */
643 len -= RSTRING_LEN(res);
644 if (len > 0) goto grow;
645 len = -len;
646 if (len > 0) goto shrink;
647 break;
649 case '%':
650 rb_raise(rb_eArgError, "%% is not supported");
651 break;
653 case 'U': /* Unicode character */
654 while (len-- > 0) {
655 SIGNED_VALUE l;
656 char buf[8];
657 int le;
659 from = NEXTFROM;
660 from = rb_to_int(from);
661 l = NUM2LONG(from);
662 if (l < 0) {
663 rb_raise(rb_eRangeError, "pack(U): value out of range");
665 le = rb_uv_to_utf8(buf, l);
666 rb_str_buf_cat(res, (char*)buf, le);
668 break;
670 case 'u': /* uuencoded string */
671 case 'm': /* base64 encoded string */
672 from = NEXTFROM;
673 StringValue(from);
674 ptr = RSTRING_PTR(from);
675 plen = RSTRING_LEN(from);
677 if (len == 0 && type == 'm') {
678 encodes(res, ptr, plen, type, 0);
679 ptr += plen;
680 break;
682 if (len <= 2)
683 len = 45;
684 else if (len > 63 && type == 'u')
685 len = 63;
686 else
687 len = len / 3 * 3;
688 while (plen > 0) {
689 long todo;
691 if (plen > len)
692 todo = len;
693 else
694 todo = plen;
695 encodes(res, ptr, todo, type, 1);
696 plen -= todo;
697 ptr += todo;
699 break;
701 case 'M': /* quoted-printable encoded string */
702 from = rb_obj_as_string(NEXTFROM);
703 if (len <= 1)
704 len = 72;
705 qpencode(res, from, len);
706 break;
708 case 'P': /* pointer to packed byte string */
709 from = THISFROM;
710 if (!NIL_P(from)) {
711 StringValue(from);
712 if (RSTRING_LEN(from) < len) {
713 rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
714 RSTRING_LEN(from), len);
717 len = 1;
718 /* FALL THROUGH */
719 case 'p': /* pointer to string */
720 while (len-- > 0) {
721 char *t;
722 from = NEXTFROM;
723 if (NIL_P(from)) {
724 t = 0;
726 else {
727 t = StringValuePtr(from);
729 if (!associates) {
730 associates = rb_ary_new();
732 rb_ary_push(associates, from);
733 rb_str_buf_cat(res, (char*)&t, sizeof(char*));
735 break;
737 case 'w': /* BER compressed integer */
738 while (len-- > 0) {
739 VALUE buf = rb_str_new(0, 0);
740 size_t numbytes;
741 int sign;
742 char *cp;
744 from = NEXTFROM;
745 from = rb_to_int(from);
746 numbytes = rb_absint_numwords(from, 7, NULL);
747 if (numbytes == 0)
748 numbytes = 1;
749 buf = rb_str_new(NULL, numbytes);
751 sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, INTEGER_PACK_BIG_ENDIAN);
753 if (sign < 0)
754 rb_raise(rb_eArgError, "can't compress negative numbers");
755 if (sign == 2)
756 rb_bug("buffer size problem?");
758 cp = RSTRING_PTR(buf);
759 while (1 < numbytes) {
760 *cp |= 0x80;
761 cp++;
762 numbytes--;
765 rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
767 break;
769 default: {
770 unknown_directive("pack", type, fmt);
771 break;
776 if (associates) {
777 str_associate(res, associates);
779 switch (enc_info) {
780 case 1:
781 ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
782 break;
783 case 2:
784 rb_enc_set_index(res, rb_utf8_encindex());
785 break;
786 default:
787 /* do nothing, keep ASCII-8BIT */
788 break;
790 return res;
793 VALUE
794 rb_ec_pack_ary(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
796 return pack_pack(ec, ary, fmt, buffer);
799 static const char uu_table[] =
800 "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
801 static const char b64_table[] =
802 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
804 static void
805 encodes(VALUE str, const char *s0, long len, int type, int tail_lf)
807 enum {buff_size = 4096, encoded_unit = 4, input_unit = 3};
808 char buff[buff_size + 1]; /* +1 for tail_lf */
809 long i = 0;
810 const char *const trans = type == 'u' ? uu_table : b64_table;
811 char padding;
812 const unsigned char *s = (const unsigned char *)s0;
814 if (type == 'u') {
815 buff[i++] = (char)len + ' ';
816 padding = '`';
818 else {
819 padding = '=';
821 while (len >= input_unit) {
822 while (len >= input_unit && buff_size-i >= encoded_unit) {
823 buff[i++] = trans[077 & (*s >> 2)];
824 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
825 buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
826 buff[i++] = trans[077 & s[2]];
827 s += input_unit;
828 len -= input_unit;
830 if (buff_size-i < encoded_unit) {
831 rb_str_buf_cat(str, buff, i);
832 i = 0;
836 if (len == 2) {
837 buff[i++] = trans[077 & (*s >> 2)];
838 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
839 buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
840 buff[i++] = padding;
842 else if (len == 1) {
843 buff[i++] = trans[077 & (*s >> 2)];
844 buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
845 buff[i++] = padding;
846 buff[i++] = padding;
848 if (tail_lf) buff[i++] = '\n';
849 rb_str_buf_cat(str, buff, i);
850 if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun");
853 static const char hex_table[] = "0123456789ABCDEF";
855 static void
856 qpencode(VALUE str, VALUE from, long len)
858 char buff[1024];
859 long i = 0, n = 0, prev = EOF;
860 unsigned char *s = (unsigned char*)RSTRING_PTR(from);
861 unsigned char *send = s + RSTRING_LEN(from);
863 while (s < send) {
864 if ((*s > 126) ||
865 (*s < 32 && *s != '\n' && *s != '\t') ||
866 (*s == '=')) {
867 buff[i++] = '=';
868 buff[i++] = hex_table[*s >> 4];
869 buff[i++] = hex_table[*s & 0x0f];
870 n += 3;
871 prev = EOF;
873 else if (*s == '\n') {
874 if (prev == ' ' || prev == '\t') {
875 buff[i++] = '=';
876 buff[i++] = *s;
878 buff[i++] = *s;
879 n = 0;
880 prev = *s;
882 else {
883 buff[i++] = *s;
884 n++;
885 prev = *s;
887 if (n > len) {
888 buff[i++] = '=';
889 buff[i++] = '\n';
890 n = 0;
891 prev = '\n';
893 if (i > 1024 - 5) {
894 rb_str_buf_cat(str, buff, i);
895 i = 0;
897 s++;
899 if (n > 0) {
900 buff[i++] = '=';
901 buff[i++] = '\n';
903 if (i > 0) {
904 rb_str_buf_cat(str, buff, i);
908 static inline int
909 hex2num(char c)
911 int n;
912 n = ruby_digit36_to_number_table[(unsigned char)c];
913 if (16 <= n)
914 n = -1;
915 return n;
918 #define PACK_LENGTH_ADJUST_SIZE(sz) do { \
919 tmp_len = 0; \
920 if (len > (long)((send-s)/(sz))) { \
921 if (!star) { \
922 tmp_len = len-(send-s)/(sz); \
924 len = (send-s)/(sz); \
926 } while (0)
928 #define PACK_ITEM_ADJUST() do { \
929 if (tmp_len > 0 && mode == UNPACK_ARRAY) \
930 rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
931 } while (0)
933 /* Workaround for Oracle Developer Studio (Oracle Solaris Studio)
934 * 12.4/12.5/12.6 C compiler optimization bug
935 * with "-xO4" optimization option.
937 #if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150
938 # define AVOID_CC_BUG volatile
939 #else
940 # define AVOID_CC_BUG
941 #endif
943 enum unpack_mode {
944 UNPACK_ARRAY,
945 UNPACK_BLOCK,
946 UNPACK_1
949 static VALUE
950 pack_unpack_internal(VALUE str, VALUE fmt, enum unpack_mode mode, long offset)
952 #define hexdigits ruby_hexdigits
953 char *s, *send;
954 char *p, *pend;
955 VALUE ary, associates = Qfalse;
956 long len;
957 AVOID_CC_BUG long tmp_len;
958 int signed_p, integer_size, bigendian_p;
959 #define UNPACK_PUSH(item) do {\
960 VALUE item_val = (item);\
961 if ((mode) == UNPACK_BLOCK) {\
962 rb_yield(item_val);\
964 else if ((mode) == UNPACK_ARRAY) {\
965 rb_ary_push(ary, item_val);\
967 else /* if ((mode) == UNPACK_1) { */ {\
968 return item_val; \
970 } while (0)
972 StringValue(str);
973 StringValue(fmt);
974 rb_must_asciicompat(fmt);
976 if (offset < 0) rb_raise(rb_eArgError, "offset can't be negative");
977 len = RSTRING_LEN(str);
978 if (offset > len) rb_raise(rb_eArgError, "offset outside of string");
980 s = RSTRING_PTR(str);
981 send = s + len;
982 s += offset;
984 p = RSTRING_PTR(fmt);
985 pend = p + RSTRING_LEN(fmt);
987 #define UNPACK_FETCH(var, type) (memcpy((var), s, sizeof(type)), s += sizeof(type))
989 ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
990 while (p < pend) {
991 int explicit_endian = 0;
992 const char type = *p++;
993 #ifdef NATINT_PACK
994 int natint = 0; /* native integer */
995 #endif
996 int star = 0;
998 if (skip_blank(p, type)) continue;
999 p = pack_modifiers(p, type, &natint, &explicit_endian);
1001 if (p >= pend)
1002 len = 1;
1003 else if (*p == '*') {
1004 star = 1;
1005 len = send - s;
1006 p++;
1008 else if (ISDIGIT(*p)) {
1009 errno = 0;
1010 len = STRTOUL(p, (char**)&p, 10);
1011 if (len < 0 || errno) {
1012 rb_raise(rb_eRangeError, "pack length too big");
1015 else {
1016 len = (type != '@');
1019 switch (type) {
1020 case '%':
1021 rb_raise(rb_eArgError, "%% is not supported");
1022 break;
1024 case 'A':
1025 if (len > send - s) len = send - s;
1027 long end = len;
1028 char *t = s + len - 1;
1030 while (t >= s) {
1031 if (*t != ' ' && *t != '\0') break;
1032 t--; len--;
1034 UNPACK_PUSH(rb_str_new(s, len));
1035 s += end;
1037 break;
1039 case 'Z':
1041 char *t = s;
1043 if (len > send-s) len = send-s;
1044 while (t < s+len && *t) t++;
1045 UNPACK_PUSH(rb_str_new(s, t-s));
1046 if (t < send) t++;
1047 s = star ? t : s+len;
1049 break;
1051 case 'a':
1052 if (len > send - s) len = send - s;
1053 UNPACK_PUSH(rb_str_new(s, len));
1054 s += len;
1055 break;
1057 case 'b':
1059 VALUE bitstr;
1060 char *t;
1061 int bits;
1062 long i;
1064 if (p[-1] == '*' || len > (send - s) * 8)
1065 len = (send - s) * 8;
1066 bits = 0;
1067 bitstr = rb_usascii_str_new(0, len);
1068 t = RSTRING_PTR(bitstr);
1069 for (i=0; i<len; i++) {
1070 if (i & 7) bits >>= 1;
1071 else bits = (unsigned char)*s++;
1072 *t++ = (bits & 1) ? '1' : '0';
1074 UNPACK_PUSH(bitstr);
1076 break;
1078 case 'B':
1080 VALUE bitstr;
1081 char *t;
1082 int bits;
1083 long i;
1085 if (p[-1] == '*' || len > (send - s) * 8)
1086 len = (send - s) * 8;
1087 bits = 0;
1088 bitstr = rb_usascii_str_new(0, len);
1089 t = RSTRING_PTR(bitstr);
1090 for (i=0; i<len; i++) {
1091 if (i & 7) bits <<= 1;
1092 else bits = (unsigned char)*s++;
1093 *t++ = (bits & 128) ? '1' : '0';
1095 UNPACK_PUSH(bitstr);
1097 break;
1099 case 'h':
1101 VALUE bitstr;
1102 char *t;
1103 int bits;
1104 long i;
1106 if (p[-1] == '*' || len > (send - s) * 2)
1107 len = (send - s) * 2;
1108 bits = 0;
1109 bitstr = rb_usascii_str_new(0, len);
1110 t = RSTRING_PTR(bitstr);
1111 for (i=0; i<len; i++) {
1112 if (i & 1)
1113 bits >>= 4;
1114 else
1115 bits = (unsigned char)*s++;
1116 *t++ = hexdigits[bits & 15];
1118 UNPACK_PUSH(bitstr);
1120 break;
1122 case 'H':
1124 VALUE bitstr;
1125 char *t;
1126 int bits;
1127 long i;
1129 if (p[-1] == '*' || len > (send - s) * 2)
1130 len = (send - s) * 2;
1131 bits = 0;
1132 bitstr = rb_usascii_str_new(0, len);
1133 t = RSTRING_PTR(bitstr);
1134 for (i=0; i<len; i++) {
1135 if (i & 1)
1136 bits <<= 4;
1137 else
1138 bits = (unsigned char)*s++;
1139 *t++ = hexdigits[(bits >> 4) & 15];
1141 UNPACK_PUSH(bitstr);
1143 break;
1145 case 'c':
1146 signed_p = 1;
1147 integer_size = 1;
1148 bigendian_p = BIGENDIAN_P(); /* not effective */
1149 goto unpack_integer;
1151 case 'C':
1152 signed_p = 0;
1153 integer_size = 1;
1154 bigendian_p = BIGENDIAN_P(); /* not effective */
1155 goto unpack_integer;
1157 case 's':
1158 signed_p = 1;
1159 integer_size = NATINT_LEN(short, 2);
1160 bigendian_p = BIGENDIAN_P();
1161 goto unpack_integer;
1163 case 'S':
1164 signed_p = 0;
1165 integer_size = NATINT_LEN(short, 2);
1166 bigendian_p = BIGENDIAN_P();
1167 goto unpack_integer;
1169 case 'i':
1170 signed_p = 1;
1171 integer_size = (int)sizeof(int);
1172 bigendian_p = BIGENDIAN_P();
1173 goto unpack_integer;
1175 case 'I':
1176 signed_p = 0;
1177 integer_size = (int)sizeof(int);
1178 bigendian_p = BIGENDIAN_P();
1179 goto unpack_integer;
1181 case 'l':
1182 signed_p = 1;
1183 integer_size = NATINT_LEN(long, 4);
1184 bigendian_p = BIGENDIAN_P();
1185 goto unpack_integer;
1187 case 'L':
1188 signed_p = 0;
1189 integer_size = NATINT_LEN(long, 4);
1190 bigendian_p = BIGENDIAN_P();
1191 goto unpack_integer;
1193 case 'q':
1194 signed_p = 1;
1195 integer_size = NATINT_LEN_Q;
1196 bigendian_p = BIGENDIAN_P();
1197 goto unpack_integer;
1199 case 'Q':
1200 signed_p = 0;
1201 integer_size = NATINT_LEN_Q;
1202 bigendian_p = BIGENDIAN_P();
1203 goto unpack_integer;
1205 case 'j':
1206 signed_p = 1;
1207 integer_size = sizeof(intptr_t);
1208 bigendian_p = BIGENDIAN_P();
1209 goto unpack_integer;
1211 case 'J':
1212 signed_p = 0;
1213 integer_size = sizeof(uintptr_t);
1214 bigendian_p = BIGENDIAN_P();
1215 goto unpack_integer;
1217 case 'n':
1218 signed_p = 0;
1219 integer_size = 2;
1220 bigendian_p = 1;
1221 goto unpack_integer;
1223 case 'N':
1224 signed_p = 0;
1225 integer_size = 4;
1226 bigendian_p = 1;
1227 goto unpack_integer;
1229 case 'v':
1230 signed_p = 0;
1231 integer_size = 2;
1232 bigendian_p = 0;
1233 goto unpack_integer;
1235 case 'V':
1236 signed_p = 0;
1237 integer_size = 4;
1238 bigendian_p = 0;
1239 goto unpack_integer;
1241 unpack_integer:
1242 if (explicit_endian) {
1243 bigendian_p = explicit_endian == '>';
1245 PACK_LENGTH_ADJUST_SIZE(integer_size);
1246 while (len-- > 0) {
1247 int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
1248 VALUE val;
1249 if (signed_p)
1250 flags |= INTEGER_PACK_2COMP;
1251 val = rb_integer_unpack(s, integer_size, 1, 0, flags);
1252 UNPACK_PUSH(val);
1253 s += integer_size;
1255 PACK_ITEM_ADJUST();
1256 break;
1258 case 'f':
1259 case 'F':
1260 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1261 while (len-- > 0) {
1262 float tmp;
1263 UNPACK_FETCH(&tmp, float);
1264 UNPACK_PUSH(DBL2NUM((double)tmp));
1266 PACK_ITEM_ADJUST();
1267 break;
1269 case 'e':
1270 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1271 while (len-- > 0) {
1272 FLOAT_CONVWITH(tmp);
1273 UNPACK_FETCH(tmp.buf, float);
1274 VTOHF(tmp);
1275 UNPACK_PUSH(DBL2NUM(tmp.f));
1277 PACK_ITEM_ADJUST();
1278 break;
1280 case 'E':
1281 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1282 while (len-- > 0) {
1283 DOUBLE_CONVWITH(tmp);
1284 UNPACK_FETCH(tmp.buf, double);
1285 VTOHD(tmp);
1286 UNPACK_PUSH(DBL2NUM(tmp.d));
1288 PACK_ITEM_ADJUST();
1289 break;
1291 case 'D':
1292 case 'd':
1293 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1294 while (len-- > 0) {
1295 double tmp;
1296 UNPACK_FETCH(&tmp, double);
1297 UNPACK_PUSH(DBL2NUM(tmp));
1299 PACK_ITEM_ADJUST();
1300 break;
1302 case 'g':
1303 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1304 while (len-- > 0) {
1305 FLOAT_CONVWITH(tmp);
1306 UNPACK_FETCH(tmp.buf, float);
1307 NTOHF(tmp);
1308 UNPACK_PUSH(DBL2NUM(tmp.f));
1310 PACK_ITEM_ADJUST();
1311 break;
1313 case 'G':
1314 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1315 while (len-- > 0) {
1316 DOUBLE_CONVWITH(tmp);
1317 UNPACK_FETCH(tmp.buf, double);
1318 NTOHD(tmp);
1319 UNPACK_PUSH(DBL2NUM(tmp.d));
1321 PACK_ITEM_ADJUST();
1322 break;
1324 case 'U':
1325 if (len > send - s) len = send - s;
1326 while (len > 0 && s < send) {
1327 long alen = send - s;
1328 unsigned long l;
1330 l = utf8_to_uv(s, &alen);
1331 s += alen; len--;
1332 UNPACK_PUSH(ULONG2NUM(l));
1334 break;
1336 case 'u':
1338 VALUE buf = rb_str_new(0, (send - s)*3/4);
1339 char *ptr = RSTRING_PTR(buf);
1340 long total = 0;
1342 while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') {
1343 long a,b,c,d;
1344 char hunk[3];
1346 len = ((unsigned char)*s++ - ' ') & 077;
1348 total += len;
1349 if (total > RSTRING_LEN(buf)) {
1350 len -= total - RSTRING_LEN(buf);
1351 total = RSTRING_LEN(buf);
1354 while (len > 0) {
1355 long mlen = len > 3 ? 3 : len;
1357 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1358 a = ((unsigned char)*s++ - ' ') & 077;
1359 else
1360 a = 0;
1361 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1362 b = ((unsigned char)*s++ - ' ') & 077;
1363 else
1364 b = 0;
1365 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1366 c = ((unsigned char)*s++ - ' ') & 077;
1367 else
1368 c = 0;
1369 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1370 d = ((unsigned char)*s++ - ' ') & 077;
1371 else
1372 d = 0;
1373 hunk[0] = (char)(a << 2 | b >> 4);
1374 hunk[1] = (char)(b << 4 | c >> 2);
1375 hunk[2] = (char)(c << 6 | d);
1376 memcpy(ptr, hunk, mlen);
1377 ptr += mlen;
1378 len -= mlen;
1380 if (s < send && (unsigned char)*s != '\r' && *s != '\n')
1381 s++; /* possible checksum byte */
1382 if (s < send && *s == '\r') s++;
1383 if (s < send && *s == '\n') s++;
1386 rb_str_set_len(buf, total);
1387 UNPACK_PUSH(buf);
1389 break;
1391 case 'm':
1393 VALUE buf = rb_str_new(0, (send - s + 3)*3/4); /* +3 is for skipping paddings */
1394 char *ptr = RSTRING_PTR(buf);
1395 int a = -1,b = -1,c = 0,d = 0;
1396 static signed char b64_xtable[256];
1398 if (b64_xtable['/'] <= 0) {
1399 int i;
1401 for (i = 0; i < 256; i++) {
1402 b64_xtable[i] = -1;
1404 for (i = 0; i < 64; i++) {
1405 b64_xtable[(unsigned char)b64_table[i]] = (char)i;
1408 if (len == 0) {
1409 while (s < send) {
1410 a = b = c = d = -1;
1411 a = b64_xtable[(unsigned char)*s++];
1412 if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
1413 b = b64_xtable[(unsigned char)*s++];
1414 if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
1415 if (*s == '=') {
1416 if (s + 2 == send && *(s + 1) == '=') break;
1417 rb_raise(rb_eArgError, "invalid base64");
1419 c = b64_xtable[(unsigned char)*s++];
1420 if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
1421 if (s + 1 == send && *s == '=') break;
1422 d = b64_xtable[(unsigned char)*s++];
1423 if (d == -1) rb_raise(rb_eArgError, "invalid base64");
1424 *ptr++ = castchar(a << 2 | b >> 4);
1425 *ptr++ = castchar(b << 4 | c >> 2);
1426 *ptr++ = castchar(c << 6 | d);
1428 if (c == -1) {
1429 *ptr++ = castchar(a << 2 | b >> 4);
1430 if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
1432 else if (d == -1) {
1433 *ptr++ = castchar(a << 2 | b >> 4);
1434 *ptr++ = castchar(b << 4 | c >> 2);
1435 if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
1438 else {
1439 while (s < send) {
1440 a = b = c = d = -1;
1441 while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1442 if (s >= send) break;
1443 s++;
1444 while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1445 if (s >= send) break;
1446 s++;
1447 while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1448 if (*s == '=' || s >= send) break;
1449 s++;
1450 while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1451 if (*s == '=' || s >= send) break;
1452 s++;
1453 *ptr++ = castchar(a << 2 | b >> 4);
1454 *ptr++ = castchar(b << 4 | c >> 2);
1455 *ptr++ = castchar(c << 6 | d);
1456 a = -1;
1458 if (a != -1 && b != -1) {
1459 if (c == -1)
1460 *ptr++ = castchar(a << 2 | b >> 4);
1461 else {
1462 *ptr++ = castchar(a << 2 | b >> 4);
1463 *ptr++ = castchar(b << 4 | c >> 2);
1467 rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1468 UNPACK_PUSH(buf);
1470 break;
1472 case 'M':
1474 VALUE buf = rb_str_new(0, send - s);
1475 char *ptr = RSTRING_PTR(buf), *ss = s;
1476 int csum = 0;
1477 int c1, c2;
1479 while (s < send) {
1480 if (*s == '=') {
1481 if (++s == send) break;
1482 if (s+1 < send && *s == '\r' && *(s+1) == '\n')
1483 s++;
1484 if (*s != '\n') {
1485 if ((c1 = hex2num(*s)) == -1) break;
1486 if (++s == send) break;
1487 if ((c2 = hex2num(*s)) == -1) break;
1488 csum |= *ptr++ = castchar(c1 << 4 | c2);
1491 else {
1492 csum |= *ptr++ = *s;
1494 s++;
1495 ss = s;
1497 rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1498 rb_str_buf_cat(buf, ss, send-ss);
1499 csum = ISASCII(csum) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
1500 ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), csum);
1501 UNPACK_PUSH(buf);
1503 break;
1505 case '@':
1506 if (len > RSTRING_LEN(str))
1507 rb_raise(rb_eArgError, "@ outside of string");
1508 s = RSTRING_PTR(str) + len;
1509 break;
1511 case 'X':
1512 if (len > s - RSTRING_PTR(str))
1513 rb_raise(rb_eArgError, "X outside of string");
1514 s -= len;
1515 break;
1517 case 'x':
1518 if (len > send - s)
1519 rb_raise(rb_eArgError, "x outside of string");
1520 s += len;
1521 break;
1523 case 'P':
1524 if (sizeof(char *) <= (size_t)(send - s)) {
1525 VALUE tmp = Qnil;
1526 char *t;
1528 UNPACK_FETCH(&t, char *);
1529 if (t) {
1530 if (!associates) associates = str_associated(str);
1531 tmp = associated_pointer(associates, t);
1532 if (len < RSTRING_LEN(tmp)) {
1533 tmp = rb_str_new(t, len);
1534 str_associate(tmp, associates);
1537 UNPACK_PUSH(tmp);
1539 break;
1541 case 'p':
1542 if (len > (long)((send - s) / sizeof(char *)))
1543 len = (send - s) / sizeof(char *);
1544 while (len-- > 0) {
1545 if ((size_t)(send - s) < sizeof(char *))
1546 break;
1547 else {
1548 VALUE tmp = Qnil;
1549 char *t;
1551 UNPACK_FETCH(&t, char *);
1552 if (t) {
1553 if (!associates) associates = str_associated(str);
1554 tmp = associated_pointer(associates, t);
1556 UNPACK_PUSH(tmp);
1559 break;
1561 case 'w':
1563 char *s0 = s;
1564 while (len > 0 && s < send) {
1565 if (*s & 0x80) {
1566 s++;
1568 else {
1569 s++;
1570 UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN));
1571 len--;
1572 s0 = s;
1576 break;
1578 default:
1579 unknown_directive("unpack", type, fmt);
1580 break;
1584 return ary;
1587 static VALUE
1588 pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
1590 enum unpack_mode mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
1591 return pack_unpack_internal(str, fmt, mode, RB_NUM2LONG(offset));
1594 static VALUE
1595 pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
1597 return pack_unpack_internal(str, fmt, UNPACK_1, RB_NUM2LONG(offset));
1601 rb_uv_to_utf8(char buf[6], unsigned long uv)
1603 if (uv <= 0x7f) {
1604 buf[0] = (char)uv;
1605 return 1;
1607 if (uv <= 0x7ff) {
1608 buf[0] = castchar(((uv>>6)&0xff)|0xc0);
1609 buf[1] = castchar((uv&0x3f)|0x80);
1610 return 2;
1612 if (uv <= 0xffff) {
1613 buf[0] = castchar(((uv>>12)&0xff)|0xe0);
1614 buf[1] = castchar(((uv>>6)&0x3f)|0x80);
1615 buf[2] = castchar((uv&0x3f)|0x80);
1616 return 3;
1618 if (uv <= 0x1fffff) {
1619 buf[0] = castchar(((uv>>18)&0xff)|0xf0);
1620 buf[1] = castchar(((uv>>12)&0x3f)|0x80);
1621 buf[2] = castchar(((uv>>6)&0x3f)|0x80);
1622 buf[3] = castchar((uv&0x3f)|0x80);
1623 return 4;
1625 if (uv <= 0x3ffffff) {
1626 buf[0] = castchar(((uv>>24)&0xff)|0xf8);
1627 buf[1] = castchar(((uv>>18)&0x3f)|0x80);
1628 buf[2] = castchar(((uv>>12)&0x3f)|0x80);
1629 buf[3] = castchar(((uv>>6)&0x3f)|0x80);
1630 buf[4] = castchar((uv&0x3f)|0x80);
1631 return 5;
1633 if (uv <= 0x7fffffff) {
1634 buf[0] = castchar(((uv>>30)&0xff)|0xfc);
1635 buf[1] = castchar(((uv>>24)&0x3f)|0x80);
1636 buf[2] = castchar(((uv>>18)&0x3f)|0x80);
1637 buf[3] = castchar(((uv>>12)&0x3f)|0x80);
1638 buf[4] = castchar(((uv>>6)&0x3f)|0x80);
1639 buf[5] = castchar((uv&0x3f)|0x80);
1640 return 6;
1642 rb_raise(rb_eRangeError, "pack(U): value out of range");
1644 UNREACHABLE_RETURN(Qnil);
1647 static const unsigned long utf8_limits[] = {
1648 0x0, /* 1 */
1649 0x80, /* 2 */
1650 0x800, /* 3 */
1651 0x10000, /* 4 */
1652 0x200000, /* 5 */
1653 0x4000000, /* 6 */
1654 0x80000000, /* 7 */
1657 static unsigned long
1658 utf8_to_uv(const char *p, long *lenp)
1660 int c = *p++ & 0xff;
1661 unsigned long uv = c;
1662 long n;
1664 if (!(uv & 0x80)) {
1665 *lenp = 1;
1666 return uv;
1668 if (!(uv & 0x40)) {
1669 *lenp = 1;
1670 rb_raise(rb_eArgError, "malformed UTF-8 character");
1673 if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
1674 else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
1675 else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
1676 else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
1677 else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
1678 else {
1679 *lenp = 1;
1680 rb_raise(rb_eArgError, "malformed UTF-8 character");
1682 if (n > *lenp) {
1683 rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
1684 n, *lenp);
1686 *lenp = n--;
1687 if (n != 0) {
1688 while (n--) {
1689 c = *p++ & 0xff;
1690 if ((c & 0xc0) != 0x80) {
1691 *lenp -= n + 1;
1692 rb_raise(rb_eArgError, "malformed UTF-8 character");
1694 else {
1695 c &= 0x3f;
1696 uv = uv << 6 | c;
1700 n = *lenp - 1;
1701 if (uv < utf8_limits[n]) {
1702 rb_raise(rb_eArgError, "redundant UTF-8 sequence");
1704 return uv;
1707 #include "pack.rbinc"
1709 void
1710 Init_pack(void)
1712 id_associated = rb_make_internal_id();