1 /**********************************************************************
6 created at: Thu Feb 10 15:17:05 JST 1994
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
10 **********************************************************************/
12 #include "ruby/internal/config.h"
17 #include <sys/types.h>
20 #include "internal/array.h"
21 #include "internal/bits.h"
22 #include "internal/string.h"
23 #include "internal/symbol.h"
24 #include "internal/variable.h"
25 #include "ruby/util.h"
30 * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
31 * instead of HAVE_LONG_LONG or LONG_LONG.
32 * This means q! and Q! means always the standard long long type and
33 * causes ArgumentError for platforms which has no long long type,
34 * even if the platform has an implementation specific 64bit type.
35 * This behavior is consistent with the document of pack/unpack.
37 #ifdef HAVE_TRUE_LONG_LONG
38 static const char natstr
[] = "sSiIlLqQjJ";
39 # define endstr natstr
41 static const char natstr
[] = "sSiIlLjJ";
42 static const char endstr
[] = "sSiIlLqQjJ";
45 #ifdef HAVE_TRUE_LONG_LONG
46 /* It is intentional to use long long instead of LONG_LONG. */
47 # define NATINT_LEN_Q NATINT_LEN(long long, 8)
49 # define NATINT_LEN_Q 8
52 #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
57 /* for universal binary of NEXTSTEP and MacOS X */
58 /* useless since autoconf 2.63? */
63 static int endian_value
;
66 if (init
) return endian_value
;
69 return endian_value
= p
[0]?0:1;
71 # define BIGENDIAN_P() (is_bigendian())
72 #elif defined(WORDS_BIGENDIAN)
73 # define BIGENDIAN_P() 1
75 # define BIGENDIAN_P() 0
79 # define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
81 # define NATINT_LEN(type,len) ((int)sizeof(type))
94 #define swapf(x) swap32(x)
95 #define swapd(x) swap64(x)
97 #define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
98 #define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
99 #define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
100 #define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
101 #define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
102 #define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
103 #define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
104 #define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
106 #define FLOAT_CONVWITH(x) FLOAT_SWAPPER x;
107 #define HTONF(x) ((x).u = rb_htonf((x).u))
108 #define HTOVF(x) ((x).u = rb_htovf((x).u))
109 #define NTOHF(x) ((x).u = rb_ntohf((x).u))
110 #define VTOHF(x) ((x).u = rb_vtohf((x).u))
112 #define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x;
113 #define HTOND(x) ((x).u = rb_htond((x).u))
114 #define HTOVD(x) ((x).u = rb_htovd((x).u))
115 #define NTOHD(x) ((x).u = rb_ntohd((x).u))
116 #define VTOHD(x) ((x).u = rb_vtohd((x).u))
118 #define MAX_INTEGER_PACK_SIZE 8
120 static const char toofew
[] = "too few arguments";
122 static void encodes(VALUE
,const char*,long,int,int);
123 static void qpencode(VALUE
,VALUE
,long);
125 static unsigned long utf8_to_uv(const char*,long*);
127 static ID id_associated
;
130 str_associate(VALUE str
, VALUE add
)
132 /* assert(NIL_P(rb_attr_get(str, id_associated))); */
133 rb_ivar_set(str
, id_associated
, add
);
137 str_associated(VALUE str
)
139 VALUE associates
= rb_ivar_lookup(str
, id_associated
, Qfalse
);
141 rb_raise(rb_eArgError
, "no associated pointer");
146 associated_pointer(VALUE associates
, const char *t
)
148 const VALUE
*p
= RARRAY_CONST_PTR(associates
);
149 const VALUE
*pend
= p
+ RARRAY_LEN(associates
);
150 for (; p
< pend
; p
++) {
152 if (RB_TYPE_P(tmp
, T_STRING
) && RSTRING_PTR(tmp
) == t
) return tmp
;
154 rb_raise(rb_eArgError
, "non associated pointer");
155 UNREACHABLE_RETURN(Qnil
);
158 RBIMPL_ATTR_NORETURN()
160 unknown_directive(const char *mode
, char type
, VALUE fmt
)
169 snprintf(unknown
, sizeof(unknown
), "\\x%.2x", type
& 0xff);
171 fmt
= rb_str_quote_unprintable(fmt
);
172 rb_raise(rb_eArgError
, "unknown %s directive '%s' in '%"PRIsVALUE
"'",
177 VALUE_to_float(VALUE obj
)
179 VALUE v
= rb_to_float(obj
);
180 double d
= RFLOAT_VALUE(v
);
185 else if (d
< -FLT_MAX
) {
188 else if (d
<= FLT_MAX
) {
197 str_expand_fill(VALUE res
, int c
, long len
)
199 long olen
= RSTRING_LEN(res
);
200 memset(RSTRING_PTR(res
) + olen
, c
, len
);
201 rb_str_set_len(res
, olen
+ len
);
205 skip_to_eol(const char *p
, const char *pend
)
207 p
= memchr(p
, '\n', pend
- p
);
208 return (char *)(p
? p
+ 1 : pend
);
211 #define skip_blank(p, type) \
212 (ISSPACE(type) || (type == '#' && (p = skip_to_eol(p, pend), 1)))
215 # define pack_modifiers(p, t, n, e) pack_modifiers(p, t, e)
218 pack_modifiers(const char *p
, char type
, int *natint
, int *explicit_endian
)
224 if (strchr(natstr
, type
)) {
231 rb_raise(rb_eArgError
, "'%c' allowed only after types %s", *p
, natstr
);
237 if (!strchr(endstr
, type
)) {
238 rb_raise(rb_eArgError
, "'%c' allowed only after types %s", *p
, endstr
);
240 if (*explicit_endian
) {
241 rb_raise(rb_eRangeError
, "Can't use both '<' and '>'");
243 *explicit_endian
= *p
++;
252 pack_pack(rb_execution_context_t
*ec
, VALUE ary
, VALUE fmt
, VALUE buffer
)
254 const char *p
, *pend
;
255 VALUE res
, from
, associates
= 0;
258 int enc_info
= 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
259 int integer_size
, bigendian_p
;
262 rb_must_asciicompat(fmt
);
263 p
= RSTRING_PTR(fmt
);
264 pend
= p
+ RSTRING_LEN(fmt
);
267 res
= rb_str_buf_new(0);
270 if (!RB_TYPE_P(buffer
, T_STRING
))
271 rb_raise(rb_eTypeError
, "buffer must be String, not %s", rb_obj_classname(buffer
));
272 rb_str_modify(buffer
);
278 #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
279 #define MORE_ITEM (idx < RARRAY_LEN(ary))
280 #define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW)
281 #define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW)
284 int explicit_endian
= 0;
285 if (RSTRING_END(fmt
) != pend
) {
286 rb_raise(rb_eRuntimeError
, "format string modified");
288 const char type
= *p
++; /* get data type */
290 int natint
= 0; /* native integer */
293 if (skip_blank(p
, type
)) continue;
294 p
= pack_modifiers(p
, type
, &natint
, &explicit_endian
);
296 if (*p
== '*') { /* set data length */
297 len
= strchr("@Xxu", type
) ? 0
298 : strchr("PMm", type
) ? 1
299 : RARRAY_LEN(ary
) - idx
;
302 else if (ISDIGIT(*p
)) {
304 len
= STRTOUL(p
, (char**)&p
, 10);
306 rb_raise(rb_eRangeError
, "pack length too big");
315 /* if encoding is US-ASCII, upgrade to UTF-8 */
316 if (enc_info
== 1) enc_info
= 2;
318 case 'm': case 'M': case 'u':
319 /* keep US-ASCII (do nothing) */
322 /* fall back to BINARY */
327 case 'A': case 'a': case 'Z':
337 ptr
= RSTRING_PTR(from
);
338 plen
= RSTRING_LEN(from
);
345 case 'a': /* arbitrary binary string (null padded) */
346 case 'A': /* arbitrary binary string (ASCII space padded) */
347 case 'Z': /* null terminated string */
349 rb_str_buf_cat(res
, ptr
, len
);
350 if (p
[-1] == '*' && type
== 'Z')
351 rb_str_buf_cat(res
, "", 1);
354 rb_str_modify_expand(res
, len
);
355 rb_str_buf_cat(res
, ptr
, plen
);
356 str_expand_fill(res
, (type
== 'A' ? ' ' : '\0'), len
- plen
);
360 #define castchar(from) (char)((from) & 0xff)
362 case 'b': /* bit string (ascending) */
368 j
= (len
- plen
+ 1)/2;
371 for (i
=0; i
++ < len
; ptr
++) {
377 char c
= castchar(byte
);
378 rb_str_buf_cat(res
, &c
, 1);
384 byte
>>= 7 - (len
& 7);
386 rb_str_buf_cat(res
, &c
, 1);
393 case 'B': /* bit string (descending) */
399 j
= (len
- plen
+ 1)/2;
402 for (i
=0; i
++ < len
; ptr
++) {
407 char c
= castchar(byte
);
408 rb_str_buf_cat(res
, &c
, 1);
414 byte
<<= 7 - (len
& 7);
416 rb_str_buf_cat(res
, &c
, 1);
423 case 'h': /* hex string (low nibble first) */
429 j
= (len
+ 1) / 2 - (plen
+ 1) / 2;
432 for (i
=0; i
++ < len
; ptr
++) {
434 byte
|= (((*ptr
& 15) + 9) & 15) << 4;
436 byte
|= (*ptr
& 15) << 4;
440 char c
= castchar(byte
);
441 rb_str_buf_cat(res
, &c
, 1);
446 char c
= castchar(byte
);
447 rb_str_buf_cat(res
, &c
, 1);
454 case 'H': /* hex string (high nibble first) */
460 j
= (len
+ 1) / 2 - (plen
+ 1) / 2;
463 for (i
=0; i
++ < len
; ptr
++) {
465 byte
|= ((*ptr
& 15) + 9) & 15;
471 char c
= castchar(byte
);
472 rb_str_buf_cat(res
, &c
, 1);
477 char c
= castchar(byte
);
478 rb_str_buf_cat(res
, &c
, 1);
487 case 'c': /* signed char */
488 case 'C': /* unsigned char */
490 bigendian_p
= BIGENDIAN_P(); /* not effective */
493 case 's': /* s for int16_t, s! for signed short */
494 case 'S': /* S for uint16_t, S! for unsigned short */
495 integer_size
= NATINT_LEN(short, 2);
496 bigendian_p
= BIGENDIAN_P();
499 case 'i': /* i and i! for signed int */
500 case 'I': /* I and I! for unsigned int */
501 integer_size
= (int)sizeof(int);
502 bigendian_p
= BIGENDIAN_P();
505 case 'l': /* l for int32_t, l! for signed long */
506 case 'L': /* L for uint32_t, L! for unsigned long */
507 integer_size
= NATINT_LEN(long, 4);
508 bigendian_p
= BIGENDIAN_P();
511 case 'q': /* q for int64_t, q! for signed long long */
512 case 'Q': /* Q for uint64_t, Q! for unsigned long long */
513 integer_size
= NATINT_LEN_Q
;
514 bigendian_p
= BIGENDIAN_P();
517 case 'j': /* j for intptr_t */
518 integer_size
= sizeof(intptr_t);
519 bigendian_p
= BIGENDIAN_P();
522 case 'J': /* J for uintptr_t */
523 integer_size
= sizeof(uintptr_t);
524 bigendian_p
= BIGENDIAN_P();
527 case 'n': /* 16 bit (2 bytes) integer (network byte-order) */
532 case 'N': /* 32 bit (4 bytes) integer (network byte-order) */
537 case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */
542 case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */
548 if (explicit_endian
) {
549 bigendian_p
= explicit_endian
== '>';
551 if (integer_size
> MAX_INTEGER_PACK_SIZE
)
552 rb_bug("unexpected integer size for pack: %d", integer_size
);
554 char intbuf
[MAX_INTEGER_PACK_SIZE
];
557 rb_integer_pack(from
, intbuf
, integer_size
, 1, 0,
559 (bigendian_p
? INTEGER_PACK_BIG_ENDIAN
: INTEGER_PACK_LITTLE_ENDIAN
));
560 rb_str_buf_cat(res
, intbuf
, integer_size
);
564 case 'f': /* single precision float in native format */
565 case 'F': /* ditto */
570 f
= VALUE_to_float(from
);
571 rb_str_buf_cat(res
, (char*)&f
, sizeof(float));
575 case 'e': /* single precision float in VAX byte-order */
580 tmp
.f
= VALUE_to_float(from
);
582 rb_str_buf_cat(res
, tmp
.buf
, sizeof(float));
586 case 'E': /* double precision float in VAX byte-order */
588 DOUBLE_CONVWITH(tmp
);
590 tmp
.d
= RFLOAT_VALUE(rb_to_float(from
));
592 rb_str_buf_cat(res
, tmp
.buf
, sizeof(double));
596 case 'd': /* double precision float in native format */
597 case 'D': /* ditto */
602 d
= RFLOAT_VALUE(rb_to_float(from
));
603 rb_str_buf_cat(res
, (char*)&d
, sizeof(double));
607 case 'g': /* single precision float in network byte-order */
611 tmp
.f
= VALUE_to_float(from
);
613 rb_str_buf_cat(res
, tmp
.buf
, sizeof(float));
617 case 'G': /* double precision float in network byte-order */
619 DOUBLE_CONVWITH(tmp
);
622 tmp
.d
= RFLOAT_VALUE(rb_to_float(from
));
624 rb_str_buf_cat(res
, tmp
.buf
, sizeof(double));
628 case 'x': /* null byte */
630 rb_str_modify_expand(res
, len
);
631 str_expand_fill(res
, '\0', len
);
634 case 'X': /* back up byte */
636 plen
= RSTRING_LEN(res
);
638 rb_raise(rb_eArgError
, "X outside of string");
639 rb_str_set_len(res
, plen
- len
);
642 case '@': /* null fill to absolute position */
643 len
-= RSTRING_LEN(res
);
644 if (len
> 0) goto grow
;
646 if (len
> 0) goto shrink
;
650 rb_raise(rb_eArgError
, "%% is not supported");
653 case 'U': /* Unicode character */
660 from
= rb_to_int(from
);
663 rb_raise(rb_eRangeError
, "pack(U): value out of range");
665 le
= rb_uv_to_utf8(buf
, l
);
666 rb_str_buf_cat(res
, (char*)buf
, le
);
670 case 'u': /* uuencoded string */
671 case 'm': /* base64 encoded string */
674 ptr
= RSTRING_PTR(from
);
675 plen
= RSTRING_LEN(from
);
677 if (len
== 0 && type
== 'm') {
678 encodes(res
, ptr
, plen
, type
, 0);
684 else if (len
> 63 && type
== 'u')
695 encodes(res
, ptr
, todo
, type
, 1);
701 case 'M': /* quoted-printable encoded string */
702 from
= rb_obj_as_string(NEXTFROM
);
705 qpencode(res
, from
, len
);
708 case 'P': /* pointer to packed byte string */
712 if (RSTRING_LEN(from
) < len
) {
713 rb_raise(rb_eArgError
, "too short buffer for P(%ld for %ld)",
714 RSTRING_LEN(from
), len
);
719 case 'p': /* pointer to string */
727 t
= StringValuePtr(from
);
730 associates
= rb_ary_new();
732 rb_ary_push(associates
, from
);
733 rb_str_buf_cat(res
, (char*)&t
, sizeof(char*));
737 case 'w': /* BER compressed integer */
739 VALUE buf
= rb_str_new(0, 0);
745 from
= rb_to_int(from
);
746 numbytes
= rb_absint_numwords(from
, 7, NULL
);
749 buf
= rb_str_new(NULL
, numbytes
);
751 sign
= rb_integer_pack(from
, RSTRING_PTR(buf
), RSTRING_LEN(buf
), 1, 1, INTEGER_PACK_BIG_ENDIAN
);
754 rb_raise(rb_eArgError
, "can't compress negative numbers");
756 rb_bug("buffer size problem?");
758 cp
= RSTRING_PTR(buf
);
759 while (1 < numbytes
) {
765 rb_str_buf_cat(res
, RSTRING_PTR(buf
), RSTRING_LEN(buf
));
770 unknown_directive("pack", type
, fmt
);
777 str_associate(res
, associates
);
781 ENCODING_CODERANGE_SET(res
, rb_usascii_encindex(), ENC_CODERANGE_7BIT
);
784 rb_enc_set_index(res
, rb_utf8_encindex());
787 /* do nothing, keep ASCII-8BIT */
794 rb_ec_pack_ary(rb_execution_context_t
*ec
, VALUE ary
, VALUE fmt
, VALUE buffer
)
796 return pack_pack(ec
, ary
, fmt
, buffer
);
799 static const char uu_table
[] =
800 "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
801 static const char b64_table
[] =
802 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
805 encodes(VALUE str
, const char *s0
, long len
, int type
, int tail_lf
)
807 enum {buff_size
= 4096, encoded_unit
= 4, input_unit
= 3};
808 char buff
[buff_size
+ 1]; /* +1 for tail_lf */
810 const char *const trans
= type
== 'u' ? uu_table
: b64_table
;
812 const unsigned char *s
= (const unsigned char *)s0
;
815 buff
[i
++] = (char)len
+ ' ';
821 while (len
>= input_unit
) {
822 while (len
>= input_unit
&& buff_size
-i
>= encoded_unit
) {
823 buff
[i
++] = trans
[077 & (*s
>> 2)];
824 buff
[i
++] = trans
[077 & (((*s
<< 4) & 060) | ((s
[1] >> 4) & 017))];
825 buff
[i
++] = trans
[077 & (((s
[1] << 2) & 074) | ((s
[2] >> 6) & 03))];
826 buff
[i
++] = trans
[077 & s
[2]];
830 if (buff_size
-i
< encoded_unit
) {
831 rb_str_buf_cat(str
, buff
, i
);
837 buff
[i
++] = trans
[077 & (*s
>> 2)];
838 buff
[i
++] = trans
[077 & (((*s
<< 4) & 060) | ((s
[1] >> 4) & 017))];
839 buff
[i
++] = trans
[077 & (((s
[1] << 2) & 074) | (('\0' >> 6) & 03))];
843 buff
[i
++] = trans
[077 & (*s
>> 2)];
844 buff
[i
++] = trans
[077 & (((*s
<< 4) & 060) | (('\0' >> 4) & 017))];
848 if (tail_lf
) buff
[i
++] = '\n';
849 rb_str_buf_cat(str
, buff
, i
);
850 if ((size_t)i
> sizeof(buff
)) rb_bug("encodes() buffer overrun");
853 static const char hex_table
[] = "0123456789ABCDEF";
856 qpencode(VALUE str
, VALUE from
, long len
)
859 long i
= 0, n
= 0, prev
= EOF
;
860 unsigned char *s
= (unsigned char*)RSTRING_PTR(from
);
861 unsigned char *send
= s
+ RSTRING_LEN(from
);
865 (*s
< 32 && *s
!= '\n' && *s
!= '\t') ||
868 buff
[i
++] = hex_table
[*s
>> 4];
869 buff
[i
++] = hex_table
[*s
& 0x0f];
873 else if (*s
== '\n') {
874 if (prev
== ' ' || prev
== '\t') {
894 rb_str_buf_cat(str
, buff
, i
);
904 rb_str_buf_cat(str
, buff
, i
);
912 n
= ruby_digit36_to_number_table
[(unsigned char)c
];
918 #define PACK_LENGTH_ADJUST_SIZE(sz) do { \
920 if (len > (long)((send-s)/(sz))) { \
922 tmp_len = len-(send-s)/(sz); \
924 len = (send-s)/(sz); \
928 #define PACK_ITEM_ADJUST() do { \
929 if (tmp_len > 0 && mode == UNPACK_ARRAY) \
930 rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
933 /* Workaround for Oracle Developer Studio (Oracle Solaris Studio)
934 * 12.4/12.5/12.6 C compiler optimization bug
935 * with "-xO4" optimization option.
937 #if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150
938 # define AVOID_CC_BUG volatile
940 # define AVOID_CC_BUG
950 pack_unpack_internal(VALUE str
, VALUE fmt
, enum unpack_mode mode
, long offset
)
952 #define hexdigits ruby_hexdigits
955 VALUE ary
, associates
= Qfalse
;
957 AVOID_CC_BUG
long tmp_len
;
958 int signed_p
, integer_size
, bigendian_p
;
959 #define UNPACK_PUSH(item) do {\
960 VALUE item_val = (item);\
961 if ((mode) == UNPACK_BLOCK) {\
964 else if ((mode) == UNPACK_ARRAY) {\
965 rb_ary_push(ary, item_val);\
967 else /* if ((mode) == UNPACK_1) { */ {\
974 rb_must_asciicompat(fmt
);
976 if (offset
< 0) rb_raise(rb_eArgError
, "offset can't be negative");
977 len
= RSTRING_LEN(str
);
978 if (offset
> len
) rb_raise(rb_eArgError
, "offset outside of string");
980 s
= RSTRING_PTR(str
);
984 p
= RSTRING_PTR(fmt
);
985 pend
= p
+ RSTRING_LEN(fmt
);
987 #define UNPACK_FETCH(var, type) (memcpy((var), s, sizeof(type)), s += sizeof(type))
989 ary
= mode
== UNPACK_ARRAY
? rb_ary_new() : Qnil
;
991 int explicit_endian
= 0;
992 const char type
= *p
++;
994 int natint
= 0; /* native integer */
998 if (skip_blank(p
, type
)) continue;
999 p
= pack_modifiers(p
, type
, &natint
, &explicit_endian
);
1003 else if (*p
== '*') {
1008 else if (ISDIGIT(*p
)) {
1010 len
= STRTOUL(p
, (char**)&p
, 10);
1011 if (len
< 0 || errno
) {
1012 rb_raise(rb_eRangeError
, "pack length too big");
1016 len
= (type
!= '@');
1021 rb_raise(rb_eArgError
, "%% is not supported");
1025 if (len
> send
- s
) len
= send
- s
;
1028 char *t
= s
+ len
- 1;
1031 if (*t
!= ' ' && *t
!= '\0') break;
1034 UNPACK_PUSH(rb_str_new(s
, len
));
1043 if (len
> send
-s
) len
= send
-s
;
1044 while (t
< s
+len
&& *t
) t
++;
1045 UNPACK_PUSH(rb_str_new(s
, t
-s
));
1047 s
= star
? t
: s
+len
;
1052 if (len
> send
- s
) len
= send
- s
;
1053 UNPACK_PUSH(rb_str_new(s
, len
));
1064 if (p
[-1] == '*' || len
> (send
- s
) * 8)
1065 len
= (send
- s
) * 8;
1067 bitstr
= rb_usascii_str_new(0, len
);
1068 t
= RSTRING_PTR(bitstr
);
1069 for (i
=0; i
<len
; i
++) {
1070 if (i
& 7) bits
>>= 1;
1071 else bits
= (unsigned char)*s
++;
1072 *t
++ = (bits
& 1) ? '1' : '0';
1074 UNPACK_PUSH(bitstr
);
1085 if (p
[-1] == '*' || len
> (send
- s
) * 8)
1086 len
= (send
- s
) * 8;
1088 bitstr
= rb_usascii_str_new(0, len
);
1089 t
= RSTRING_PTR(bitstr
);
1090 for (i
=0; i
<len
; i
++) {
1091 if (i
& 7) bits
<<= 1;
1092 else bits
= (unsigned char)*s
++;
1093 *t
++ = (bits
& 128) ? '1' : '0';
1095 UNPACK_PUSH(bitstr
);
1106 if (p
[-1] == '*' || len
> (send
- s
) * 2)
1107 len
= (send
- s
) * 2;
1109 bitstr
= rb_usascii_str_new(0, len
);
1110 t
= RSTRING_PTR(bitstr
);
1111 for (i
=0; i
<len
; i
++) {
1115 bits
= (unsigned char)*s
++;
1116 *t
++ = hexdigits
[bits
& 15];
1118 UNPACK_PUSH(bitstr
);
1129 if (p
[-1] == '*' || len
> (send
- s
) * 2)
1130 len
= (send
- s
) * 2;
1132 bitstr
= rb_usascii_str_new(0, len
);
1133 t
= RSTRING_PTR(bitstr
);
1134 for (i
=0; i
<len
; i
++) {
1138 bits
= (unsigned char)*s
++;
1139 *t
++ = hexdigits
[(bits
>> 4) & 15];
1141 UNPACK_PUSH(bitstr
);
1148 bigendian_p
= BIGENDIAN_P(); /* not effective */
1149 goto unpack_integer
;
1154 bigendian_p
= BIGENDIAN_P(); /* not effective */
1155 goto unpack_integer
;
1159 integer_size
= NATINT_LEN(short, 2);
1160 bigendian_p
= BIGENDIAN_P();
1161 goto unpack_integer
;
1165 integer_size
= NATINT_LEN(short, 2);
1166 bigendian_p
= BIGENDIAN_P();
1167 goto unpack_integer
;
1171 integer_size
= (int)sizeof(int);
1172 bigendian_p
= BIGENDIAN_P();
1173 goto unpack_integer
;
1177 integer_size
= (int)sizeof(int);
1178 bigendian_p
= BIGENDIAN_P();
1179 goto unpack_integer
;
1183 integer_size
= NATINT_LEN(long, 4);
1184 bigendian_p
= BIGENDIAN_P();
1185 goto unpack_integer
;
1189 integer_size
= NATINT_LEN(long, 4);
1190 bigendian_p
= BIGENDIAN_P();
1191 goto unpack_integer
;
1195 integer_size
= NATINT_LEN_Q
;
1196 bigendian_p
= BIGENDIAN_P();
1197 goto unpack_integer
;
1201 integer_size
= NATINT_LEN_Q
;
1202 bigendian_p
= BIGENDIAN_P();
1203 goto unpack_integer
;
1207 integer_size
= sizeof(intptr_t);
1208 bigendian_p
= BIGENDIAN_P();
1209 goto unpack_integer
;
1213 integer_size
= sizeof(uintptr_t);
1214 bigendian_p
= BIGENDIAN_P();
1215 goto unpack_integer
;
1221 goto unpack_integer
;
1227 goto unpack_integer
;
1233 goto unpack_integer
;
1239 goto unpack_integer
;
1242 if (explicit_endian
) {
1243 bigendian_p
= explicit_endian
== '>';
1245 PACK_LENGTH_ADJUST_SIZE(integer_size
);
1247 int flags
= bigendian_p
? INTEGER_PACK_BIG_ENDIAN
: INTEGER_PACK_LITTLE_ENDIAN
;
1250 flags
|= INTEGER_PACK_2COMP
;
1251 val
= rb_integer_unpack(s
, integer_size
, 1, 0, flags
);
1260 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1263 UNPACK_FETCH(&tmp
, float);
1264 UNPACK_PUSH(DBL2NUM((double)tmp
));
1270 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1272 FLOAT_CONVWITH(tmp
);
1273 UNPACK_FETCH(tmp
.buf
, float);
1275 UNPACK_PUSH(DBL2NUM(tmp
.f
));
1281 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1283 DOUBLE_CONVWITH(tmp
);
1284 UNPACK_FETCH(tmp
.buf
, double);
1286 UNPACK_PUSH(DBL2NUM(tmp
.d
));
1293 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1296 UNPACK_FETCH(&tmp
, double);
1297 UNPACK_PUSH(DBL2NUM(tmp
));
1303 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1305 FLOAT_CONVWITH(tmp
);
1306 UNPACK_FETCH(tmp
.buf
, float);
1308 UNPACK_PUSH(DBL2NUM(tmp
.f
));
1314 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1316 DOUBLE_CONVWITH(tmp
);
1317 UNPACK_FETCH(tmp
.buf
, double);
1319 UNPACK_PUSH(DBL2NUM(tmp
.d
));
1325 if (len
> send
- s
) len
= send
- s
;
1326 while (len
> 0 && s
< send
) {
1327 long alen
= send
- s
;
1330 l
= utf8_to_uv(s
, &alen
);
1332 UNPACK_PUSH(ULONG2NUM(l
));
1338 VALUE buf
= rb_str_new(0, (send
- s
)*3/4);
1339 char *ptr
= RSTRING_PTR(buf
);
1342 while (s
< send
&& (unsigned char)*s
> ' ' && (unsigned char)*s
< 'a') {
1346 len
= ((unsigned char)*s
++ - ' ') & 077;
1349 if (total
> RSTRING_LEN(buf
)) {
1350 len
-= total
- RSTRING_LEN(buf
);
1351 total
= RSTRING_LEN(buf
);
1355 long mlen
= len
> 3 ? 3 : len
;
1357 if (s
< send
&& (unsigned char)*s
>= ' ' && (unsigned char)*s
< 'a')
1358 a
= ((unsigned char)*s
++ - ' ') & 077;
1361 if (s
< send
&& (unsigned char)*s
>= ' ' && (unsigned char)*s
< 'a')
1362 b
= ((unsigned char)*s
++ - ' ') & 077;
1365 if (s
< send
&& (unsigned char)*s
>= ' ' && (unsigned char)*s
< 'a')
1366 c
= ((unsigned char)*s
++ - ' ') & 077;
1369 if (s
< send
&& (unsigned char)*s
>= ' ' && (unsigned char)*s
< 'a')
1370 d
= ((unsigned char)*s
++ - ' ') & 077;
1373 hunk
[0] = (char)(a
<< 2 | b
>> 4);
1374 hunk
[1] = (char)(b
<< 4 | c
>> 2);
1375 hunk
[2] = (char)(c
<< 6 | d
);
1376 memcpy(ptr
, hunk
, mlen
);
1380 if (s
< send
&& (unsigned char)*s
!= '\r' && *s
!= '\n')
1381 s
++; /* possible checksum byte */
1382 if (s
< send
&& *s
== '\r') s
++;
1383 if (s
< send
&& *s
== '\n') s
++;
1386 rb_str_set_len(buf
, total
);
1393 VALUE buf
= rb_str_new(0, (send
- s
+ 3)*3/4); /* +3 is for skipping paddings */
1394 char *ptr
= RSTRING_PTR(buf
);
1395 int a
= -1,b
= -1,c
= 0,d
= 0;
1396 static signed char b64_xtable
[256];
1398 if (b64_xtable
['/'] <= 0) {
1401 for (i
= 0; i
< 256; i
++) {
1404 for (i
= 0; i
< 64; i
++) {
1405 b64_xtable
[(unsigned char)b64_table
[i
]] = (char)i
;
1411 a
= b64_xtable
[(unsigned char)*s
++];
1412 if (s
>= send
|| a
== -1) rb_raise(rb_eArgError
, "invalid base64");
1413 b
= b64_xtable
[(unsigned char)*s
++];
1414 if (s
>= send
|| b
== -1) rb_raise(rb_eArgError
, "invalid base64");
1416 if (s
+ 2 == send
&& *(s
+ 1) == '=') break;
1417 rb_raise(rb_eArgError
, "invalid base64");
1419 c
= b64_xtable
[(unsigned char)*s
++];
1420 if (s
>= send
|| c
== -1) rb_raise(rb_eArgError
, "invalid base64");
1421 if (s
+ 1 == send
&& *s
== '=') break;
1422 d
= b64_xtable
[(unsigned char)*s
++];
1423 if (d
== -1) rb_raise(rb_eArgError
, "invalid base64");
1424 *ptr
++ = castchar(a
<< 2 | b
>> 4);
1425 *ptr
++ = castchar(b
<< 4 | c
>> 2);
1426 *ptr
++ = castchar(c
<< 6 | d
);
1429 *ptr
++ = castchar(a
<< 2 | b
>> 4);
1430 if (b
& 0xf) rb_raise(rb_eArgError
, "invalid base64");
1433 *ptr
++ = castchar(a
<< 2 | b
>> 4);
1434 *ptr
++ = castchar(b
<< 4 | c
>> 2);
1435 if (c
& 0x3) rb_raise(rb_eArgError
, "invalid base64");
1441 while ((a
= b64_xtable
[(unsigned char)*s
]) == -1 && s
< send
) {s
++;}
1442 if (s
>= send
) break;
1444 while ((b
= b64_xtable
[(unsigned char)*s
]) == -1 && s
< send
) {s
++;}
1445 if (s
>= send
) break;
1447 while ((c
= b64_xtable
[(unsigned char)*s
]) == -1 && s
< send
) {if (*s
== '=') break; s
++;}
1448 if (*s
== '=' || s
>= send
) break;
1450 while ((d
= b64_xtable
[(unsigned char)*s
]) == -1 && s
< send
) {if (*s
== '=') break; s
++;}
1451 if (*s
== '=' || s
>= send
) break;
1453 *ptr
++ = castchar(a
<< 2 | b
>> 4);
1454 *ptr
++ = castchar(b
<< 4 | c
>> 2);
1455 *ptr
++ = castchar(c
<< 6 | d
);
1458 if (a
!= -1 && b
!= -1) {
1460 *ptr
++ = castchar(a
<< 2 | b
>> 4);
1462 *ptr
++ = castchar(a
<< 2 | b
>> 4);
1463 *ptr
++ = castchar(b
<< 4 | c
>> 2);
1467 rb_str_set_len(buf
, ptr
- RSTRING_PTR(buf
));
1474 VALUE buf
= rb_str_new(0, send
- s
);
1475 char *ptr
= RSTRING_PTR(buf
), *ss
= s
;
1481 if (++s
== send
) break;
1482 if (s
+1 < send
&& *s
== '\r' && *(s
+1) == '\n')
1485 if ((c1
= hex2num(*s
)) == -1) break;
1486 if (++s
== send
) break;
1487 if ((c2
= hex2num(*s
)) == -1) break;
1488 csum
|= *ptr
++ = castchar(c1
<< 4 | c2
);
1492 csum
|= *ptr
++ = *s
;
1497 rb_str_set_len(buf
, ptr
- RSTRING_PTR(buf
));
1498 rb_str_buf_cat(buf
, ss
, send
-ss
);
1499 csum
= ISASCII(csum
) ? ENC_CODERANGE_7BIT
: ENC_CODERANGE_VALID
;
1500 ENCODING_CODERANGE_SET(buf
, rb_ascii8bit_encindex(), csum
);
1506 if (len
> RSTRING_LEN(str
))
1507 rb_raise(rb_eArgError
, "@ outside of string");
1508 s
= RSTRING_PTR(str
) + len
;
1512 if (len
> s
- RSTRING_PTR(str
))
1513 rb_raise(rb_eArgError
, "X outside of string");
1519 rb_raise(rb_eArgError
, "x outside of string");
1524 if (sizeof(char *) <= (size_t)(send
- s
)) {
1528 UNPACK_FETCH(&t
, char *);
1530 if (!associates
) associates
= str_associated(str
);
1531 tmp
= associated_pointer(associates
, t
);
1532 if (len
< RSTRING_LEN(tmp
)) {
1533 tmp
= rb_str_new(t
, len
);
1534 str_associate(tmp
, associates
);
1542 if (len
> (long)((send
- s
) / sizeof(char *)))
1543 len
= (send
- s
) / sizeof(char *);
1545 if ((size_t)(send
- s
) < sizeof(char *))
1551 UNPACK_FETCH(&t
, char *);
1553 if (!associates
) associates
= str_associated(str
);
1554 tmp
= associated_pointer(associates
, t
);
1564 while (len
> 0 && s
< send
) {
1570 UNPACK_PUSH(rb_integer_unpack(s0
, s
-s0
, 1, 1, INTEGER_PACK_BIG_ENDIAN
));
1579 unknown_directive("unpack", type
, fmt
);
1588 pack_unpack(rb_execution_context_t
*ec
, VALUE str
, VALUE fmt
, VALUE offset
)
1590 enum unpack_mode mode
= rb_block_given_p() ? UNPACK_BLOCK
: UNPACK_ARRAY
;
1591 return pack_unpack_internal(str
, fmt
, mode
, RB_NUM2LONG(offset
));
1595 pack_unpack1(rb_execution_context_t
*ec
, VALUE str
, VALUE fmt
, VALUE offset
)
1597 return pack_unpack_internal(str
, fmt
, UNPACK_1
, RB_NUM2LONG(offset
));
1601 rb_uv_to_utf8(char buf
[6], unsigned long uv
)
1608 buf
[0] = castchar(((uv
>>6)&0xff)|0xc0);
1609 buf
[1] = castchar((uv
&0x3f)|0x80);
1613 buf
[0] = castchar(((uv
>>12)&0xff)|0xe0);
1614 buf
[1] = castchar(((uv
>>6)&0x3f)|0x80);
1615 buf
[2] = castchar((uv
&0x3f)|0x80);
1618 if (uv
<= 0x1fffff) {
1619 buf
[0] = castchar(((uv
>>18)&0xff)|0xf0);
1620 buf
[1] = castchar(((uv
>>12)&0x3f)|0x80);
1621 buf
[2] = castchar(((uv
>>6)&0x3f)|0x80);
1622 buf
[3] = castchar((uv
&0x3f)|0x80);
1625 if (uv
<= 0x3ffffff) {
1626 buf
[0] = castchar(((uv
>>24)&0xff)|0xf8);
1627 buf
[1] = castchar(((uv
>>18)&0x3f)|0x80);
1628 buf
[2] = castchar(((uv
>>12)&0x3f)|0x80);
1629 buf
[3] = castchar(((uv
>>6)&0x3f)|0x80);
1630 buf
[4] = castchar((uv
&0x3f)|0x80);
1633 if (uv
<= 0x7fffffff) {
1634 buf
[0] = castchar(((uv
>>30)&0xff)|0xfc);
1635 buf
[1] = castchar(((uv
>>24)&0x3f)|0x80);
1636 buf
[2] = castchar(((uv
>>18)&0x3f)|0x80);
1637 buf
[3] = castchar(((uv
>>12)&0x3f)|0x80);
1638 buf
[4] = castchar(((uv
>>6)&0x3f)|0x80);
1639 buf
[5] = castchar((uv
&0x3f)|0x80);
1642 rb_raise(rb_eRangeError
, "pack(U): value out of range");
1644 UNREACHABLE_RETURN(Qnil
);
1647 static const unsigned long utf8_limits
[] = {
1657 static unsigned long
1658 utf8_to_uv(const char *p
, long *lenp
)
1660 int c
= *p
++ & 0xff;
1661 unsigned long uv
= c
;
1670 rb_raise(rb_eArgError
, "malformed UTF-8 character");
1673 if (!(uv
& 0x20)) { n
= 2; uv
&= 0x1f; }
1674 else if (!(uv
& 0x10)) { n
= 3; uv
&= 0x0f; }
1675 else if (!(uv
& 0x08)) { n
= 4; uv
&= 0x07; }
1676 else if (!(uv
& 0x04)) { n
= 5; uv
&= 0x03; }
1677 else if (!(uv
& 0x02)) { n
= 6; uv
&= 0x01; }
1680 rb_raise(rb_eArgError
, "malformed UTF-8 character");
1683 rb_raise(rb_eArgError
, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
1690 if ((c
& 0xc0) != 0x80) {
1692 rb_raise(rb_eArgError
, "malformed UTF-8 character");
1701 if (uv
< utf8_limits
[n
]) {
1702 rb_raise(rb_eArgError
, "redundant UTF-8 sequence");
1707 #include "pack.rbinc"
1712 id_associated
= rb_make_internal_id();