@@@ -77,7 -77,7 +77,7 @@@ check_little_endian(void
static unsigned int
hex2int(unsigned char ch)
{
- if (ch >= '0' && ch <= '9')
+ if (ch >= '0' && ch <= '9')
return ch - '0';
else if (ch >= 'A' && ch <= 'F')
return 10 + (ch - 'A');
@@@ -414,8 -414,12 +414,12 @@@ pack_utf8(mrb_state *mrb, mrb_value o,
{
char utf8[4];
int len;
-
- unsigned long c = mrb_fixnum(o);
+ unsigned long c = 0;
+
+ if (mrb_float_p(o)) {
+ goto range_error;
+ }
+ c = mrb_fixnum(o);
/* Unicode character */
/* from mruby-compiler gem */
utf8[2] = (char)(0x80 | ( c & 0x3F));
len = 3;
}
- else {
+ else if (c < 0x200000) {
utf8[0] = (char)(0xF0 | (c >> 18) );
utf8[1] = (char)(0x80 | ((c >> 12) & 0x3F));
utf8[2] = (char)(0x80 | ((c >> 6) & 0x3F));
utf8[3] = (char)(0x80 | ( c & 0x3F));
len = 4;
}
-
+ else {
+ range_error:
+ mrb_raise(mrb, E_RANGE_ERROR, "pack(U): value out of range");
+ }
+
str = str_len_ensure(mrb, str, sidx + len);
memcpy(RSTRING_PTR(str) + sidx, utf8, len);
-
+
return len;
}
+ static const unsigned long utf8_limits[] = {
+ 0x0, /* 1 */
+ 0x80, /* 2 */
+ 0x800, /* 3 */
+ 0x10000, /* 4 */
+ 0x200000, /* 5 */
+ 0x4000000, /* 6 */
+ 0x80000000, /* 7 */
+ };
+
+ static unsigned long
+ utf8_to_uv(mrb_state *mrb, const char *p, long *lenp)
+ {
+ int c = *p++ & 0xff;
+ unsigned long uv = c;
+ long n;
+
+ if (!(uv & 0x80)) {
+ *lenp = 1;
+ return uv;
+ }
+ if (!(uv & 0x40)) {
+ *lenp = 1;
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "malformed UTF-8 character");
+ }
+
+ if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
+ else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
+ else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
+ else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
+ else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
+ else {
+ *lenp = 1;
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "malformed UTF-8 character");
+ }
+ if (n > *lenp) {
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "malformed UTF-8 character (expected %S bytes, given %S bytes)",
+ mrb_fixnum_value(n), mrb_fixnum_value(*lenp));
+ }
+ *lenp = n--;
+ if (n != 0) {
+ while (n--) {
+ c = *p++ & 0xff;
+ if ((c & 0xc0) != 0x80) {
+ *lenp -= n + 1;
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "malformed UTF-8 character");
+ }
+ else {
+ c &= 0x3f;
+ uv = uv << 6 | c;
+ }
+ }
+ }
+ n = *lenp - 1;
+ if (uv < utf8_limits[n]) {
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "redundant UTF-8 sequence");
+ }
+ return uv;
+ }
+
+ static int
+ unpack_utf8(mrb_state *mrb, const unsigned char * src, int srclen, mrb_value ary, unsigned int flags)
+ {
+ unsigned long uv;
+ long lenp = srclen;
+
+ if (srclen == 0) {
+ return 1;
+ }
+ uv = utf8_to_uv(mrb, (const char *)src, &lenp);
+ mrb_ary_push(mrb, ary, mrb_fixnum_value((mrb_int)uv));
+ return (int)lenp;
+ }
+
static int
pack_a(mrb_state *mrb, mrb_value src, mrb_value dst, mrb_int didx, long count, unsigned int flags)
{
while (padlen-- > 0) {
*dptr++ = pad;
}
-
+
return dptr - dptr0;
}
@@@ -541,7 -623,7 +623,7 @@@ pack_h(mrb_state *mrb, mrb_value src, m
} else if (slen > count) {
slen = count;
}
-
+
dst = str_len_ensure(mrb, dst, didx + count);
dptr = RSTRING_PTR(dst) + didx;
@@@ -1018,7 -1100,11 +1100,7 @@@ mrb_pack_pack(mrb_state *mrb, mrb_valu
o = mrb_ary_ref(mrb, ary, aidx);
if (type == PACK_TYPE_INTEGER) {
- if (mrb_float_p(o)) {
- o = mrb_funcall(mrb, o, "to_i", 0);
- } else if (!mrb_fixnum_p(o)) {
- mrb_raisef(mrb, E_TYPE_ERROR, "can't convert %S into Integer", mrb_class_path(mrb, mrb_obj_class(mrb, o)));
- }
+ o = mrb_to_int(mrb, o);
} else if (type == PACK_TYPE_FLOAT) {
if (!mrb_float_p(o)) {
o = mrb_funcall(mrb, o, "to_f", 0);
@@@ -1147,6 -1233,11 +1229,11 @@@ mrb_pack_unpack(mrb_state *mrb, mrb_val
case PACK_DIR_DOUBLE:
srcidx += unpack_double(mrb, sptr, srclen - srcidx, result, flags);
break;
+ case PACK_DIR_UTF8:
+ srcidx += unpack_utf8(mrb, sptr, srclen - srcidx, result, flags);
+ break;
+ default:
+ mrb_raise(mrb, E_RUNTIME_ERROR, "mruby-pack's bug");
}
if (count > 0) {
count--;