summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
Diffstat (limited to 'ext')
-rw-r--r--ext/json/generator/generator.c65
-rw-r--r--ext/json/generator/generator.h1
2 files changed, 16 insertions, 50 deletions
diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c
index 766e199f74..9ecbfbeaa0 100644
--- a/ext/json/generator/generator.c
+++ b/ext/json/generator/generator.c
@@ -66,42 +66,6 @@ static const char trailingBytesForUTF8[256] = {
static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
-/*
- * Utility routine to tell whether a sequence of bytes is legal UTF-8.
- * This must be called with the length pre-determined by the first byte.
- * If not calling this from ConvertUTF8to*, then the length can be set by:
- * length = trailingBytesForUTF8[*source]+1;
- * and the sequence is illegal right away if there aren't that many bytes
- * available.
- * If presented with a length > 4, this returns 0. The Unicode
- * definition of UTF-8 goes up to 4-byte sequences.
- */
-static unsigned char isLegalUTF8(const UTF8 *source, unsigned long length)
-{
- UTF8 a;
- const UTF8 *srcptr = source+length;
- switch (length) {
- default: return 0;
- /* Everything else falls through when "1"... */
- case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
- case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
- case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
-
- switch (*source) {
- /* no fall-through in this inner switch */
- case 0xE0: if (a < 0xA0) return 0; break;
- case 0xED: if (a > 0x9F) return 0; break;
- case 0xF0: if (a < 0x90) return 0; break;
- case 0xF4: if (a > 0x8F) return 0; break;
- default: if (a < 0x80) return 0;
- }
-
- case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
- }
- if (*source > 0xF4) return 0;
- return 1;
-}
-
/* Escapes the UTF16 character and stores the result in the buffer buf. */
static void unicode_escape(char *buf, UTF16 character)
{
@@ -130,17 +94,18 @@ static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char scrip
const UTF8 *sourceEnd = source + RSTRING_LEN(string);
char buf[6] = { '\\', 'u' };
- while (source < sourceEnd) {
- UTF32 ch = 0;
- unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
- if (source + extraBytesToRead >= sourceEnd) {
- rb_raise(rb_path2class("JSON::GeneratorError"),
- "partial character in source, but hit end");
- }
- if (!isLegalUTF8(source, extraBytesToRead+1)) {
+ int ascii_only = rb_enc_str_asciionly_p(string);
+
+ if (!ascii_only) {
+ if (RB_ENCODING_GET_INLINED(string) != rb_utf8_encindex() || RB_ENC_CODERANGE(string) != RUBY_ENC_CODERANGE_VALID) {
rb_raise(rb_path2class("JSON::GeneratorError"),
"source sequence is illegal/malformed utf-8");
}
+ }
+
+ while (source < sourceEnd) {
+ UTF32 ch = 0;
+ unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
/*
* The cases all fall through. See "Note A" below.
*/
@@ -238,6 +203,13 @@ static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char script_safe
char buf[6] = { '\\', 'u' };
int ascii_only = rb_enc_str_asciionly_p(string);
+ if (!ascii_only) {
+ if (RB_ENCODING_GET_INLINED(string) != rb_utf8_encindex() || RB_ENC_CODERANGE(string) != RUBY_ENC_CODERANGE_VALID) {
+ rb_raise(rb_path2class("JSON::GeneratorError"),
+ "source sequence is illegal/malformed utf-8");
+ }
+ }
+
for (start = 0, end = 0; end < len;) {
p = ptr + end;
c = (unsigned char) *p;
@@ -309,11 +281,6 @@ static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char script_safe
continue;
}
}
-
- if (!isLegalUTF8((UTF8 *) p, clen)) {
- rb_raise(rb_path2class("JSON::GeneratorError"),
- "source sequence is illegal/malformed utf-8");
- }
}
end += clen;
}
diff --git a/ext/json/generator/generator.h b/ext/json/generator/generator.h
index 98b7d833f3..8dde7be827 100644
--- a/ext/json/generator/generator.h
+++ b/ext/json/generator/generator.h
@@ -46,7 +46,6 @@ static const int halfShift = 10; /* used for shifting by 10 bits */
static const UTF32 halfBase = 0x0010000UL;
static const UTF32 halfMask = 0x3FFUL;
-static unsigned char isLegalUTF8(const UTF8 *source, unsigned long length);
static void unicode_escape(char *buf, UTF16 character);
static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character);
static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char script_safe);