summaryrefslogtreecommitdiff
path: root/spec/ruby/core/string
diff options
context:
space:
mode:
Diffstat (limited to 'spec/ruby/core/string')
-rw-r--r--spec/ruby/core/string/byteslice_spec.rb6
-rw-r--r--spec/ruby/core/string/capitalize_spec.rb4
-rw-r--r--spec/ruby/core/string/chars_spec.rb7
-rw-r--r--spec/ruby/core/string/chomp_spec.rb4
-rw-r--r--spec/ruby/core/string/chop_spec.rb4
-rw-r--r--spec/ruby/core/string/clone_spec.rb4
-rw-r--r--spec/ruby/core/string/delete_prefix_spec.rb4
-rw-r--r--spec/ruby/core/string/delete_spec.rb4
-rw-r--r--spec/ruby/core/string/delete_suffix_spec.rb4
-rw-r--r--spec/ruby/core/string/downcase_spec.rb4
-rw-r--r--spec/ruby/core/string/dump_spec.rb10
-rw-r--r--spec/ruby/core/string/dup_spec.rb4
-rw-r--r--spec/ruby/core/string/lines_spec.rb1
-rw-r--r--spec/ruby/core/string/reverse_spec.rb4
-rw-r--r--spec/ruby/core/string/scan_spec.rb6
-rw-r--r--spec/ruby/core/string/scrub_spec.rb10
-rw-r--r--spec/ruby/core/string/shared/each_line.rb6
-rw-r--r--spec/ruby/core/string/shared/partition.rb15
-rw-r--r--spec/ruby/core/string/shared/slice.rb13
-rw-r--r--spec/ruby/core/string/shared/strip.rb4
-rw-r--r--spec/ruby/core/string/shared/succ.rb4
-rw-r--r--spec/ruby/core/string/split_spec.rb17
-rw-r--r--spec/ruby/core/string/squeeze_spec.rb5
-rw-r--r--spec/ruby/core/string/swapcase_spec.rb4
-rw-r--r--spec/ruby/core/string/undump_spec.rb2
-rw-r--r--spec/ruby/core/string/unpack/b_spec.rb28
-rw-r--r--spec/ruby/core/string/unpack/c_spec.rb14
-rw-r--r--spec/ruby/core/string/unpack/h_spec.rb28
-rw-r--r--spec/ruby/core/string/unpack/shared/basic.rb28
-rw-r--r--spec/ruby/core/string/unpack/shared/float.rb60
-rw-r--r--spec/ruby/core/string/unpack/shared/integer.rb88
-rw-r--r--spec/ruby/core/string/unpack/shared/unicode.rb14
-rw-r--r--spec/ruby/core/string/unpack/w_spec.rb14
-rw-r--r--spec/ruby/core/string/unpack1_spec.rb12
-rw-r--r--spec/ruby/core/string/unpack_spec.rb34
-rw-r--r--spec/ruby/core/string/upcase_spec.rb4
-rw-r--r--spec/ruby/core/string/valid_encoding/utf_8_spec.rb214
37 files changed, 610 insertions, 78 deletions
diff --git a/spec/ruby/core/string/byteslice_spec.rb b/spec/ruby/core/string/byteslice_spec.rb
index a49da040eb..312229523d 100644
--- a/spec/ruby/core/string/byteslice_spec.rb
+++ b/spec/ruby/core/string/byteslice_spec.rb
@@ -24,4 +24,10 @@ describe "String#byteslice on on non ASCII strings" do
"\u3042".byteslice(1..2).should == "\x81\x82".force_encoding("UTF-8")
"\u3042".byteslice(-1).should == "\x82".force_encoding("UTF-8")
end
+
+ it "returns a String in the same encoding as self" do
+ "ruby".encode("UTF-8").slice(0).encoding.should == Encoding::UTF_8
+ "ruby".encode("US-ASCII").slice(0).encoding.should == Encoding::US_ASCII
+ "ruby".encode("Windows-1251").slice(0).encoding.should == Encoding::Windows_1251
+ end
end
diff --git a/spec/ruby/core/string/capitalize_spec.rb b/spec/ruby/core/string/capitalize_spec.rb
index 751f4160a6..3f85cf5ae4 100644
--- a/spec/ruby/core/string/capitalize_spec.rb
+++ b/spec/ruby/core/string/capitalize_spec.rb
@@ -91,6 +91,10 @@ describe "String#capitalize" do
StringSpecs::MyString.new("Hello").capitalize.should be_an_instance_of(String)
end
end
+
+ it "returns a String in the same encoding as self" do
+ "h".encode("US-ASCII").capitalize.encoding.should == Encoding::US_ASCII
+ end
end
describe "String#capitalize!" do
diff --git a/spec/ruby/core/string/chars_spec.rb b/spec/ruby/core/string/chars_spec.rb
index e4f26bc0cc..715e65dc90 100644
--- a/spec/ruby/core/string/chars_spec.rb
+++ b/spec/ruby/core/string/chars_spec.rb
@@ -1,5 +1,4 @@
require_relative 'shared/chars'
-require_relative 'shared/each_char_without_block'
describe "String#chars" do
it_behaves_like :string_chars, :chars
@@ -7,4 +6,10 @@ describe "String#chars" do
it "returns an array when no block given" do
"hello".chars.should == ['h', 'e', 'l', 'l', 'o']
end
+
+ it "returns Strings in the same encoding as self" do
+ "hello".encode("US-ASCII").chars.each do |c|
+ c.encoding.should == Encoding::US_ASCII
+ end
+ end
end
diff --git a/spec/ruby/core/string/chomp_spec.rb b/spec/ruby/core/string/chomp_spec.rb
index c03bfc7951..d0508d938f 100644
--- a/spec/ruby/core/string/chomp_spec.rb
+++ b/spec/ruby/core/string/chomp_spec.rb
@@ -40,6 +40,10 @@ describe "String#chomp" do
"".chomp.should == ""
end
+ it "returns a String in the same encoding as self" do
+ "abc\n\n".encode("US-ASCII").chomp.encoding.should == Encoding::US_ASCII
+ end
+
ruby_version_is ''...'3.0' do
it "returns subclass instances when called on a subclass" do
str = StringSpecs::MyString.new("hello\n").chomp
diff --git a/spec/ruby/core/string/chop_spec.rb b/spec/ruby/core/string/chop_spec.rb
index 266d973f67..f598d34bc8 100644
--- a/spec/ruby/core/string/chop_spec.rb
+++ b/spec/ruby/core/string/chop_spec.rb
@@ -60,6 +60,10 @@ describe "String#chop" do
StringSpecs::MyString.new("hello\n").chop.should be_an_instance_of(String)
end
end
+
+ it "returns a String in the same encoding as self" do
+ "abc\n\n".encode("US-ASCII").chop.encoding.should == Encoding::US_ASCII
+ end
end
describe "String#chop!" do
diff --git a/spec/ruby/core/string/clone_spec.rb b/spec/ruby/core/string/clone_spec.rb
index f8d40423f0..a2ba2f9877 100644
--- a/spec/ruby/core/string/clone_spec.rb
+++ b/spec/ruby/core/string/clone_spec.rb
@@ -54,4 +54,8 @@ describe "String#clone" do
orig.should == "xtring"
clone.should == "string"
end
+
+ it "returns a String in the same encoding as self" do
+ "a".encode("US-ASCII").clone.encoding.should == Encoding::US_ASCII
+ end
end
diff --git a/spec/ruby/core/string/delete_prefix_spec.rb b/spec/ruby/core/string/delete_prefix_spec.rb
index 17ce18bcca..238de85f05 100644
--- a/spec/ruby/core/string/delete_prefix_spec.rb
+++ b/spec/ruby/core/string/delete_prefix_spec.rb
@@ -51,6 +51,10 @@ describe "String#delete_prefix" do
s.delete_prefix('hell').should be_an_instance_of(String)
end
end
+
+ it "returns a String in the same encoding as self" do
+ 'hello'.encode("US-ASCII").delete_prefix('hell').encoding.should == Encoding::US_ASCII
+ end
end
describe "String#delete_prefix!" do
diff --git a/spec/ruby/core/string/delete_spec.rb b/spec/ruby/core/string/delete_spec.rb
index b91e88b76f..87831a9d19 100644
--- a/spec/ruby/core/string/delete_spec.rb
+++ b/spec/ruby/core/string/delete_spec.rb
@@ -95,6 +95,10 @@ describe "String#delete" do
StringSpecs::MyString.new("oh no!!!").delete("!").should be_an_instance_of(String)
end
end
+
+ it "returns a String in the same encoding as self" do
+ "hello".encode("US-ASCII").delete("lo").encoding.should == Encoding::US_ASCII
+ end
end
describe "String#delete!" do
diff --git a/spec/ruby/core/string/delete_suffix_spec.rb b/spec/ruby/core/string/delete_suffix_spec.rb
index 0705c73246..6883d6938c 100644
--- a/spec/ruby/core/string/delete_suffix_spec.rb
+++ b/spec/ruby/core/string/delete_suffix_spec.rb
@@ -51,6 +51,10 @@ describe "String#delete_suffix" do
s.delete_suffix('ello').should be_an_instance_of(String)
end
end
+
+ it "returns a String in the same encoding as self" do
+ "hello".encode("US-ASCII").delete_suffix("ello").encoding.should == Encoding::US_ASCII
+ end
end
describe "String#delete_suffix!" do
diff --git a/spec/ruby/core/string/downcase_spec.rb b/spec/ruby/core/string/downcase_spec.rb
index f0a15f1e25..153b4ce191 100644
--- a/spec/ruby/core/string/downcase_spec.rb
+++ b/spec/ruby/core/string/downcase_spec.rb
@@ -8,6 +8,10 @@ describe "String#downcase" do
"hello".downcase.should == "hello"
end
+ it "returns a String in the same encoding as self" do
+ "hELLO".encode("US-ASCII").downcase.encoding.should == Encoding::US_ASCII
+ end
+
describe "full Unicode case mapping" do
it "works for all of Unicode with no option" do
"ÄÖÜ".downcase.should == "äöü"
diff --git a/spec/ruby/core/string/dump_spec.rb b/spec/ruby/core/string/dump_spec.rb
index 79a8b55e6d..81de0cfae4 100644
--- a/spec/ruby/core/string/dump_spec.rb
+++ b/spec/ruby/core/string/dump_spec.rb
@@ -350,7 +350,7 @@ describe "String#dump" do
].should be_computed_by(:dump)
end
- it "returns a string with multi-byte UTF-8 characters replaced by \\u{} notation with upper-case hex digits" do
+ it "returns a string with multi-byte UTF-8 characters less than or equal 0xFFFF replaced by \\uXXXX notation with upper-case hex digits" do
[ [0200.chr('utf-8'), '"\u0080"'],
[0201.chr('utf-8'), '"\u0081"'],
[0202.chr('utf-8'), '"\u0082"'],
@@ -382,15 +382,21 @@ describe "String#dump" do
[0235.chr('utf-8'), '"\u009D"'],
[0236.chr('utf-8'), '"\u009E"'],
[0237.chr('utf-8'), '"\u009F"'],
+ [0177777.chr('utf-8'), '"\uFFFF"'],
].should be_computed_by(:dump)
end
+ it "returns a string with multi-byte UTF-8 characters greater than 0xFFFF replaced by \\u{XXXXXX} notation with upper-case hex digits" do
+ 0x10000.chr('utf-8').dump.should == '"\u{10000}"'
+ 0x10FFFF.chr('utf-8').dump.should == '"\u{10FFFF}"'
+ end
+
it "includes .force_encoding(name) if the encoding isn't ASCII compatible" do
"\u{876}".encode('utf-16be').dump.should.end_with?(".force_encoding(\"UTF-16BE\")")
"\u{876}".encode('utf-16le').dump.should.end_with?(".force_encoding(\"UTF-16LE\")")
end
- it "keeps origin encoding" do
+ it "returns a String in the same encoding as self" do
"foo".encode("ISO-8859-1").dump.encoding.should == Encoding::ISO_8859_1
"foo".encode('windows-1251').dump.encoding.should == Encoding::Windows_1251
1.chr.dump.encoding.should == Encoding::US_ASCII
diff --git a/spec/ruby/core/string/dup_spec.rb b/spec/ruby/core/string/dup_spec.rb
index eec3cf0a70..73f71b8ffc 100644
--- a/spec/ruby/core/string/dup_spec.rb
+++ b/spec/ruby/core/string/dup_spec.rb
@@ -58,4 +58,8 @@ describe "String#dup" do
orig.should == "c"
copy.should == "b"
end
+
+ it "returns a String in the same encoding as self" do
+ "hello".encode("US-ASCII").dup.encoding.should == Encoding::US_ASCII
+ end
end
diff --git a/spec/ruby/core/string/lines_spec.rb b/spec/ruby/core/string/lines_spec.rb
index ad4b119074..40ab5f71d8 100644
--- a/spec/ruby/core/string/lines_spec.rb
+++ b/spec/ruby/core/string/lines_spec.rb
@@ -1,7 +1,6 @@
require_relative '../../spec_helper'
require_relative 'fixtures/classes'
require_relative 'shared/each_line'
-require_relative 'shared/each_line_without_block'
describe "String#lines" do
it_behaves_like :string_each_line, :lines
diff --git a/spec/ruby/core/string/reverse_spec.rb b/spec/ruby/core/string/reverse_spec.rb
index 4206b8af90..73526256ef 100644
--- a/spec/ruby/core/string/reverse_spec.rb
+++ b/spec/ruby/core/string/reverse_spec.rb
@@ -37,6 +37,10 @@ describe "String#reverse" do
str.reverse.should == "體黑正\xDE\xDF軟微"
end
+
+ it "returns a String in the same encoding as self" do
+ "stressed".encode("US-ASCII").reverse.encoding.should == Encoding::US_ASCII
+ end
end
describe "String#reverse!" do
diff --git a/spec/ruby/core/string/scan_spec.rb b/spec/ruby/core/string/scan_spec.rb
index ab73f5747b..a2d1815132 100644
--- a/spec/ruby/core/string/scan_spec.rb
+++ b/spec/ruby/core/string/scan_spec.rb
@@ -69,6 +69,12 @@ describe "String#scan" do
it "does not raise any errors when passed a multi-byte string" do
"あああaaaあああ".scan("あああ").should == ["あああ", "あああ"]
end
+
+ it "returns Strings in the same encoding as self" do
+ "cruel world".encode("US-ASCII").scan(/\w+/).each do |s|
+ s.encoding.should == Encoding::US_ASCII
+ end
+ end
end
describe "String#scan with pattern and block" do
diff --git a/spec/ruby/core/string/scrub_spec.rb b/spec/ruby/core/string/scrub_spec.rb
index 66755bcc7b..a51fbd020a 100644
--- a/spec/ruby/core/string/scrub_spec.rb
+++ b/spec/ruby/core/string/scrub_spec.rb
@@ -31,6 +31,11 @@ describe "String#scrub with a default replacement" do
input.scrub.should == "abc?????"
end
+ it "returns a String in the same encoding as self" do
+ x81 = [0x81].pack('C').force_encoding('utf-8')
+ "abc\u3042#{x81}".scrub.encoding.should == Encoding::UTF_8
+ end
+
ruby_version_is '3.0' do
it "returns String instances when called on a subclass" do
StringSpecs::MyString.new("foo").scrub.should be_an_instance_of(String)
@@ -80,6 +85,11 @@ describe "String#scrub with a custom replacement" do
block.should raise_error(ArgumentError)
end
+ it "returns a String in the same encoding as self" do
+ x81 = [0x81].pack('C').force_encoding('utf-8')
+ "abc\u3042#{x81}".scrub("*").encoding.should == Encoding::UTF_8
+ end
+
it "raises TypeError when a non String replacement is given" do
x81 = [0x81].pack('C').force_encoding('utf-8')
block = -> { "foo#{x81}".scrub(1) }
diff --git a/spec/ruby/core/string/shared/each_line.rb b/spec/ruby/core/string/shared/each_line.rb
index bfedf8f35a..df78bd2186 100644
--- a/spec/ruby/core/string/shared/each_line.rb
+++ b/spec/ruby/core/string/shared/each_line.rb
@@ -122,6 +122,12 @@ describe :string_each_line, shared: true do
out.should == ["hello\n", "world."]
end
+ it "returns Strings in the same encoding as self" do
+ "one\ntwo\r\nthree".encode("US-ASCII").send(@method) do |s|
+ s.encoding.should == Encoding::US_ASCII
+ end
+ end
+
it "raises a TypeError when the separator can't be converted to a string" do
-> { "hello world".send(@method, false) {} }.should raise_error(TypeError)
-> { "hello world".send(@method, mock('x')) {} }.should raise_error(TypeError)
diff --git a/spec/ruby/core/string/shared/partition.rb b/spec/ruby/core/string/shared/partition.rb
index 7dc3d9cc0b..41b3c7e0c9 100644
--- a/spec/ruby/core/string/shared/partition.rb
+++ b/spec/ruby/core/string/shared/partition.rb
@@ -33,4 +33,19 @@ describe :string_partition, shared: true do
end
end
end
+
+ it "returns before- and after- parts in the same encoding as self" do
+ strings = "hello".encode("US-ASCII").send(@method, "ello")
+ strings[0].encoding.should == Encoding::US_ASCII
+ strings[2].encoding.should == Encoding::US_ASCII
+
+ strings = "hello".encode("US-ASCII").send(@method, /ello/)
+ strings[0].encoding.should == Encoding::US_ASCII
+ strings[2].encoding.should == Encoding::US_ASCII
+ end
+
+ it "returns the matching part in the separator's encoding" do
+ strings = "hello".encode("US-ASCII").send(@method, "ello")
+ strings[1].encoding.should == Encoding::UTF_8
+ end
end
diff --git a/spec/ruby/core/string/shared/slice.rb b/spec/ruby/core/string/shared/slice.rb
index 713234fffd..a7c1d05b56 100644
--- a/spec/ruby/core/string/shared/slice.rb
+++ b/spec/ruby/core/string/shared/slice.rb
@@ -80,7 +80,7 @@ describe :string_slice_index_length, shared: true do
"hello there".send(@method, -3,2).should == "er"
end
- it "returns a string with the same encoding" do
+ it "returns a string with the same encoding as self" do
s = "hello there"
s.send(@method, 1, 9).encoding.should == s.encoding
@@ -206,6 +206,10 @@ describe :string_slice_range, shared: true do
"x".send(@method, 1..-1).should == ""
end
+ it "returns a String in the same encoding as self" do
+ "hello there".encode("US-ASCII").send(@method, 1..1).encoding.should == Encoding::US_ASCII
+ end
+
it "returns nil if the beginning of the range falls outside of self" do
"hello there".send(@method, 12..-1).should == nil
"hello there".send(@method, 20..25).should == nil
@@ -328,7 +332,8 @@ describe :string_slice_regexp, shared: true do
"hello there".send(@method, /xyz/).should == nil
end
- not_supported_on :opal do
+ it "returns a String in the same encoding as self" do
+ "hello there".encode("US-ASCII").send(@method, /[aeiou](.)\1/).encoding.should == Encoding::US_ASCII
end
ruby_version_is ''...'3.0' do
@@ -391,6 +396,10 @@ describe :string_slice_regexp_index, shared: true do
$~[1].should == nil
end
+ it "returns a String in the same encoding as self" do
+ "hello there".encode("US-ASCII").send(@method, /[aeiou](.)\1/, 0).encoding.should == Encoding::US_ASCII
+ end
+
it "calls to_int on the given index" do
obj = mock('2')
obj.should_receive(:to_int).and_return(2)
diff --git a/spec/ruby/core/string/shared/strip.rb b/spec/ruby/core/string/shared/strip.rb
index 9c232b4694..0c0aae20f3 100644
--- a/spec/ruby/core/string/shared/strip.rb
+++ b/spec/ruby/core/string/shared/strip.rb
@@ -2,6 +2,10 @@ require_relative '../../../spec_helper'
require_relative '../fixtures/classes'
describe :string_strip, shared: true do
+ it "returns a String in the same encoding as self" do
+ " hello ".encode("US-ASCII").send(@method).encoding.should == Encoding::US_ASCII
+ end
+
ruby_version_is '3.0' do
it "returns String instances when called on a subclass" do
StringSpecs::MyString.new(" hello ").send(@method).should be_an_instance_of(String)
diff --git a/spec/ruby/core/string/shared/succ.rb b/spec/ruby/core/string/shared/succ.rb
index 66edf6dc82..3605fa99a2 100644
--- a/spec/ruby/core/string/shared/succ.rb
+++ b/spec/ruby/core/string/shared/succ.rb
@@ -74,6 +74,10 @@ describe :string_succ, shared: true do
StringSpecs::MyString.new("z").send(@method).should be_an_instance_of(String)
end
end
+
+ it "returns a String in the same encoding as self" do
+ "z".encode("US-ASCII").send(@method).encoding.should == Encoding::US_ASCII
+ end
end
describe :string_succ_bang, shared: true do
diff --git a/spec/ruby/core/string/split_spec.rb b/spec/ruby/core/string/split_spec.rb
index 0417486692..519c5d845d 100644
--- a/spec/ruby/core/string/split_spec.rb
+++ b/spec/ruby/core/string/split_spec.rb
@@ -246,6 +246,13 @@ describe "String#split with String" do
it "doesn't split on non-ascii whitespace" do
"a\u{2008}b".split(" ").should == ["a\u{2008}b"]
end
+
+ it "returns Strings in the same encoding as self" do
+ strings = "hello world".encode("US-ASCII").split(" ")
+
+ strings[0].encoding.should == Encoding::US_ASCII
+ strings[1].encoding.should == Encoding::US_ASCII
+ end
end
describe "String#split with Regexp" do
@@ -443,13 +450,12 @@ describe "String#split with Regexp" do
end
end
- it "retains the encoding of the source string" do
+ it "returns Strings in the same encoding as self" do
ary = "а б в".split
encodings = ary.map { |s| s.encoding }
encodings.should == [Encoding::UTF_8, Encoding::UTF_8, Encoding::UTF_8]
end
-
it "splits a string on each character for a multibyte encoding and empty split" do
"That's why efficiency could not be helped".split("").size.should == 39
end
@@ -598,4 +604,11 @@ describe "String#split with Regexp" do
-> { "hello".split(false) }.should raise_error(TypeError)
-> { "hello".split(Object.new) }.should raise_error(TypeError)
end
+
+ it "returns Strings in the same encoding as self" do
+ strings = "hello world".encode("US-ASCII").split(/ /)
+
+ strings[0].encoding.should == Encoding::US_ASCII
+ strings[1].encoding.should == Encoding::US_ASCII
+ end
end
diff --git a/spec/ruby/core/string/squeeze_spec.rb b/spec/ruby/core/string/squeeze_spec.rb
index 5dc12a4247..2f3fa65745 100644
--- a/spec/ruby/core/string/squeeze_spec.rb
+++ b/spec/ruby/core/string/squeeze_spec.rb
@@ -64,6 +64,11 @@ describe "String#squeeze" do
"hello room".squeeze(other_string, other_string2).should == "hello rom"
end
+ it "returns a String in the same encoding as self" do
+ "yellow moon".encode("US-ASCII").squeeze.encoding.should == Encoding::US_ASCII
+ "yellow moon".encode("US-ASCII").squeeze("a").encoding.should == Encoding::US_ASCII
+ end
+
it "raises a TypeError when one set arg can't be converted to a string" do
-> { "hello world".squeeze([]) }.should raise_error(TypeError)
-> { "hello world".squeeze(Object.new)}.should raise_error(TypeError)
diff --git a/spec/ruby/core/string/swapcase_spec.rb b/spec/ruby/core/string/swapcase_spec.rb
index 6307a1eaaf..d369ab3e4e 100644
--- a/spec/ruby/core/string/swapcase_spec.rb
+++ b/spec/ruby/core/string/swapcase_spec.rb
@@ -9,6 +9,10 @@ describe "String#swapcase" do
"+++---111222???".swapcase.should == "+++---111222???"
end
+ it "returns a String in the same encoding as self" do
+ "Hello".encode("US-ASCII").swapcase.encoding.should == Encoding::US_ASCII
+ end
+
describe "full Unicode case mapping" do
it "works for all of Unicode with no option" do
"äÖü".swapcase.should == "ÄöÜ"
diff --git a/spec/ruby/core/string/undump_spec.rb b/spec/ruby/core/string/undump_spec.rb
index 08058d9bd1..6ff220161c 100644
--- a/spec/ruby/core/string/undump_spec.rb
+++ b/spec/ruby/core/string/undump_spec.rb
@@ -389,7 +389,7 @@ describe "String#undump" do
'"\\bv".force_encoding("UTF-16BE")'.undump.should == "\u0876".encode('utf-16be')
end
- it "keeps origin encoding" do
+ it "returns a String in the same encoding as self" do
'"foo"'.encode("ISO-8859-1").undump.encoding.should == Encoding::ISO_8859_1
'"foo"'.encode('windows-1251').undump.encoding.should == Encoding::Windows_1251
end
diff --git a/spec/ruby/core/string/unpack/b_spec.rb b/spec/ruby/core/string/unpack/b_spec.rb
index fcabc99731..2cf5ebad34 100644
--- a/spec/ruby/core/string/unpack/b_spec.rb
+++ b/spec/ruby/core/string/unpack/b_spec.rb
@@ -86,8 +86,18 @@ describe "String#unpack with format 'B'" do
].should be_computed_by(:unpack, "BBB")
end
- it "ignores NULL bytes between directives" do
- "\x80\x00".unpack("B\x00B").should == ["1", "0"]
+ ruby_version_is ""..."3.3" do
+ it "ignores NULL bytes between directives" do
+ "\x80\x00".unpack("B\x00B").should == ["1", "0"]
+ end
+ end
+
+ ruby_version_is "3.3" do
+ it "raise ArgumentError for NULL bytes between directives" do
+ -> {
+ "\x80\x00".unpack("B\x00B")
+ }.should raise_error(ArgumentError, /unknown unpack directive/)
+ end
end
it "ignores spaces between directives" do
@@ -182,8 +192,18 @@ describe "String#unpack with format 'b'" do
].should be_computed_by(:unpack, "bbb")
end
- it "ignores NULL bytes between directives" do
- "\x01\x00".unpack("b\x00b").should == ["1", "0"]
+ ruby_version_is ""..."3.3" do
+ it "ignores NULL bytes between directives" do
+ "\x01\x00".unpack("b\x00b").should == ["1", "0"]
+ end
+ end
+
+ ruby_version_is "3.3" do
+ it "raise ArgumentError for NULL bytes between directives" do
+ -> {
+ "\x01\x00".unpack("b\x00b")
+ }.should raise_error(ArgumentError, /unknown unpack directive/)
+ end
end
it "ignores spaces between directives" do
diff --git a/spec/ruby/core/string/unpack/c_spec.rb b/spec/ruby/core/string/unpack/c_spec.rb
index ed8caa4895..dbcbacc74d 100644
--- a/spec/ruby/core/string/unpack/c_spec.rb
+++ b/spec/ruby/core/string/unpack/c_spec.rb
@@ -35,8 +35,18 @@ describe :string_unpack_8bit, shared: true do
].should be_computed_by(:unpack, unpack_format(3))
end
- it "ignores NULL bytes between directives" do
- "abc".unpack(unpack_format("\000", 2)).should == [97, 98]
+ ruby_version_is ""..."3.3" do
+ it "ignores NULL bytes between directives" do
+ "abc".unpack(unpack_format("\000", 2)).should == [97, 98]
+ end
+ end
+
+ ruby_version_is "3.3" do
+ it "raise ArgumentError for NULL bytes between directives" do
+ -> {
+ "abc".unpack(unpack_format("\000", 2))
+ }.should raise_error(ArgumentError, /unknown unpack directive/)
+ end
end
it "ignores spaces between directives" do
diff --git a/spec/ruby/core/string/unpack/h_spec.rb b/spec/ruby/core/string/unpack/h_spec.rb
index f2f5dcf396..ee08d20926 100644
--- a/spec/ruby/core/string/unpack/h_spec.rb
+++ b/spec/ruby/core/string/unpack/h_spec.rb
@@ -56,8 +56,18 @@ describe "String#unpack with format 'H'" do
].should be_computed_by(:unpack, "HHH")
end
- it "ignores NULL bytes between directives" do
- "\x01\x10".unpack("H\x00H").should == ["0", "1"]
+ ruby_version_is ""..."3.3" do
+ it "ignores NULL bytes between directives" do
+ "\x01\x10".unpack("H\x00H").should == ["0", "1"]
+ end
+ end
+
+ ruby_version_is "3.3" do
+ it "raise ArgumentError for NULL bytes between directives" do
+ -> {
+ "\x01\x10".unpack("H\x00H")
+ }.should raise_error(ArgumentError, /unknown unpack directive/)
+ end
end
it "ignores spaces between directives" do
@@ -121,8 +131,18 @@ describe "String#unpack with format 'h'" do
].should be_computed_by(:unpack, "hhh")
end
- it "ignores NULL bytes between directives" do
- "\x01\x10".unpack("h\x00h").should == ["1", "0"]
+ ruby_version_is ""..."3.3" do
+ it "ignores NULL bytes between directives" do
+ "\x01\x10".unpack("h\x00h").should == ["1", "0"]
+ end
+ end
+
+ ruby_version_is "3.3" do
+ it "raise ArgumentError for NULL bytes between directives" do
+ -> {
+ "\x01\x10".unpack("h\x00h")
+ }.should raise_error(ArgumentError, /unknown unpack directive/)
+ end
end
it "ignores spaces between directives" do
diff --git a/spec/ruby/core/string/unpack/shared/basic.rb b/spec/ruby/core/string/unpack/shared/basic.rb
index f636f4689f..bb5302edc5 100644
--- a/spec/ruby/core/string/unpack/shared/basic.rb
+++ b/spec/ruby/core/string/unpack/shared/basic.rb
@@ -8,20 +8,6 @@ describe :string_unpack_basic, shared: true do
d.should_receive(:to_str).and_return("a"+unpack_format)
"abc".unpack(d).should be_an_instance_of(Array)
end
-
- it "raises a TypeError when passed nil" do
- -> { "abc".unpack(nil) }.should raise_error(TypeError)
- end
-
- it "raises a TypeError when passed an Integer" do
- -> { "abc".unpack(1) }.should raise_error(TypeError)
- end
-
- ruby_version_is "3.1" do
- it "starts unpacking from the given offset" do
- "abc".unpack("CC", offset: 1).should == [98, 99]
- end
- end
end
describe :string_unpack_no_platform, shared: true do
@@ -32,18 +18,4 @@ describe :string_unpack_no_platform, shared: true do
it "raises an ArgumentError when the format modifier is '!'" do
-> { "abcdefgh".unpack(unpack_format("!")) }.should raise_error(ArgumentError)
end
-
- ruby_version_is "3.1" do
- it "raises an ArgumentError when the offset is negative" do
- -> { "a".unpack("C", offset: -1) }.should raise_error(ArgumentError)
- end
-
- it "returns nil if the offset is at the end of the string" do
- "a".unpack("C", offset: 1).should == [nil]
- end
-
- it "raises an ArgumentError when the offset is larget than the string" do
- -> { "a".unpack("C", offset: 2) }.should raise_error(ArgumentError)
- end
- end
end
diff --git a/spec/ruby/core/string/unpack/shared/float.rb b/spec/ruby/core/string/unpack/shared/float.rb
index 99bd8a3401..ccddf94f99 100644
--- a/spec/ruby/core/string/unpack/shared/float.rb
+++ b/spec/ruby/core/string/unpack/shared/float.rb
@@ -56,9 +56,19 @@ describe :string_unpack_float_le, shared: true do
[nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true
end
- it "ignores NULL bytes between directives" do
- array = "\x9a\x999@33\xb3?".unpack(unpack_format("\000", 2))
- array.should == [2.9000000953674316, 1.399999976158142]
+ ruby_version_is ""..."3.3" do
+ it "ignores NULL bytes between directives" do
+ array = "\x9a\x999@33\xb3?".unpack(unpack_format("\000", 2))
+ array.should == [2.9000000953674316, 1.399999976158142]
+ end
+ end
+
+ ruby_version_is "3.3" do
+ it "raise ArgumentError for NULL bytes between directives" do
+ -> {
+ "\x9a\x999@33\xb3?".unpack(unpack_format("\000", 2))
+ }.should raise_error(ArgumentError, /unknown unpack directive/)
+ end
end
it "ignores spaces between directives" do
@@ -123,9 +133,19 @@ describe :string_unpack_float_be, shared: true do
[nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true
end
- it "ignores NULL bytes between directives" do
- array = "@9\x99\x9a?\xb333".unpack(unpack_format("\000", 2))
- array.should == [2.9000000953674316, 1.399999976158142]
+ ruby_version_is ""..."3.3" do
+ it "ignores NULL bytes between directives" do
+ array = "@9\x99\x9a?\xb333".unpack(unpack_format("\000", 2))
+ array.should == [2.9000000953674316, 1.399999976158142]
+ end
+ end
+
+ ruby_version_is "3.3" do
+ it "raise ArgumentError for NULL bytes between directives" do
+ -> {
+ "@9\x99\x9a?\xb333".unpack(unpack_format("\000", 2))
+ }.should raise_error(ArgumentError, /unknown unpack directive/)
+ end
end
it "ignores spaces between directives" do
@@ -193,8 +213,18 @@ describe :string_unpack_double_le, shared: true do
[nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true
end
- it "ignores NULL bytes between directives" do
- "333333\x07@ffffff\xf6?".unpack(unpack_format("\000", 2)).should == [2.9, 1.4]
+ ruby_version_is ""..."3.3" do
+ it "ignores NULL bytes between directives" do
+ "333333\x07@ffffff\xf6?".unpack(unpack_format("\000", 2)).should == [2.9, 1.4]
+ end
+ end
+
+ ruby_version_is "3.3" do
+ it "raise ArgumentError for NULL bytes between directives" do
+ -> {
+ "333333\x07@ffffff\xf6?".unpack(unpack_format("\000", 2))
+ }.should raise_error(ArgumentError, /unknown unpack directive/)
+ end
end
it "ignores spaces between directives" do
@@ -261,8 +291,18 @@ describe :string_unpack_double_be, shared: true do
[nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true
end
- it "ignores NULL bytes between directives" do
- "@\x07333333?\xf6ffffff".unpack(unpack_format("\000", 2)).should == [2.9, 1.4]
+ ruby_version_is ""..."3.3" do
+ it "ignores NULL bytes between directives" do
+ "@\x07333333?\xf6ffffff".unpack(unpack_format("\000", 2)).should == [2.9, 1.4]
+ end
+ end
+
+ ruby_version_is "3.3" do
+ it "raise ArgumentError for NULL bytes between directives" do
+ -> {
+ "@\x07333333?\xf6ffffff".unpack(unpack_format("\000", 2))
+ }.should raise_error(ArgumentError, /unknown unpack directive/)
+ end
end
it "ignores spaces between directives" do
diff --git a/spec/ruby/core/string/unpack/shared/integer.rb b/spec/ruby/core/string/unpack/shared/integer.rb
index cbaa743683..ba4f149dad 100644
--- a/spec/ruby/core/string/unpack/shared/integer.rb
+++ b/spec/ruby/core/string/unpack/shared/integer.rb
@@ -32,8 +32,18 @@ describe :string_unpack_16bit_le, shared: true do
].should be_computed_by(:unpack, unpack_format(3))
end
- it "ignores NULL bytes between directives" do
- "abcd".unpack(unpack_format("\000", 2)).should == [25185, 25699]
+ ruby_version_is ""..."3.3" do
+ it "ignores NULL bytes between directives" do
+ "abcd".unpack(unpack_format("\000", 2)).should == [25185, 25699]
+ end
+ end
+
+ ruby_version_is "3.3" do
+ it "raise ArgumentError for NULL bytes between directives" do
+ -> {
+ "abcd".unpack(unpack_format("\000", 2))
+ }.should raise_error(ArgumentError, /unknown unpack directive/)
+ end
end
it "ignores spaces between directives" do
@@ -85,8 +95,18 @@ describe :string_unpack_16bit_be, shared: true do
].should be_computed_by(:unpack, unpack_format(3))
end
- it "ignores NULL bytes between directives" do
- "badc".unpack(unpack_format("\000", 2)).should == [25185, 25699]
+ ruby_version_is ""..."3.3" do
+ it "ignores NULL bytes between directives" do
+ "badc".unpack(unpack_format("\000", 2)).should == [25185, 25699]
+ end
+ end
+
+ ruby_version_is "3.3" do
+ it "raise ArgumentError for NULL bytes between directives" do
+ -> {
+ "badc".unpack(unpack_format("\000", 2))
+ }.should raise_error(ArgumentError, /unknown unpack directive/)
+ end
end
it "ignores spaces between directives" do
@@ -139,8 +159,18 @@ describe :string_unpack_32bit_le, shared: true do
].should be_computed_by(:unpack, unpack_format(3))
end
- it "ignores NULL bytes between directives" do
- "abcdefgh".unpack(unpack_format("\000", 2)).should == [1684234849, 1751606885]
+ ruby_version_is ""..."3.3" do
+ it "ignores NULL bytes between directives" do
+ "abcdefgh".unpack(unpack_format("\000", 2)).should == [1684234849, 1751606885]
+ end
+ end
+
+ ruby_version_is "3.3" do
+ it "raise ArgumentError for NULL bytes between directives" do
+ -> {
+ "abcdefgh".unpack(unpack_format("\000", 2))
+ }.should raise_error(ArgumentError, /unknown unpack directive/)
+ end
end
it "ignores spaces between directives" do
@@ -193,8 +223,18 @@ describe :string_unpack_32bit_be, shared: true do
].should be_computed_by(:unpack, unpack_format(3))
end
- it "ignores NULL bytes between directives" do
- "dcbahgfe".unpack(unpack_format("\000", 2)).should == [1684234849, 1751606885]
+ ruby_version_is ""..."3.3" do
+ it "ignores NULL bytes between directives" do
+ "dcbahgfe".unpack(unpack_format("\000", 2)).should == [1684234849, 1751606885]
+ end
+ end
+
+ ruby_version_is "3.3" do
+ it "raise ArgumentError for NULL bytes between directives" do
+ -> {
+ "dcbahgfe".unpack(unpack_format("\000", 2))
+ }.should raise_error(ArgumentError, /unknown unpack directive/)
+ end
end
it "ignores spaces between directives" do
@@ -243,9 +283,19 @@ describe :string_unpack_64bit_le, shared: true do
"abc".unpack(unpack_format('*')).should == []
end
- it "ignores NULL bytes between directives" do
- array = "abcdefghabghefcd".unpack(unpack_format("\000", 2))
- array.should == [7523094288207667809, 7233738012216484449]
+ ruby_version_is ""..."3.3" do
+ it "ignores NULL bytes between directives" do
+ array = "abcdefghabghefcd".unpack(unpack_format("\000", 2))
+ array.should == [7523094288207667809, 7233738012216484449]
+ end
+ end
+
+ ruby_version_is "3.3" do
+ it "raise ArgumentError for NULL bytes between directives" do
+ -> {
+ "badc".unpack(unpack_format("\000", 2))
+ }.should raise_error(ArgumentError, /unknown unpack directive/)
+ end
end
it "ignores spaces between directives" do
@@ -305,9 +355,19 @@ describe :string_unpack_64bit_be, shared: true do
"abc".unpack(unpack_format('*')).should == []
end
- it "ignores NULL bytes between directives" do
- array = "hgfedcbadcfehgba".unpack(unpack_format("\000", 2))
- array.should == [7523094288207667809, 7233738012216484449]
+ ruby_version_is ""..."3.3" do
+ it "ignores NULL bytes between directives" do
+ array = "hgfedcbadcfehgba".unpack(unpack_format("\000", 2))
+ array.should == [7523094288207667809, 7233738012216484449]
+ end
+ end
+
+ ruby_version_is "3.3" do
+ it "raise ArgumentError for NULL bytes between directives" do
+ -> {
+ "hgfedcbadcfehgba".unpack(unpack_format("\000", 2))
+ }.should raise_error(ArgumentError, /unknown unpack directive/)
+ end
end
it "ignores spaces between directives" do
diff --git a/spec/ruby/core/string/unpack/shared/unicode.rb b/spec/ruby/core/string/unpack/shared/unicode.rb
index a2b4e142b2..ce1f29fe87 100644
--- a/spec/ruby/core/string/unpack/shared/unicode.rb
+++ b/spec/ruby/core/string/unpack/shared/unicode.rb
@@ -50,8 +50,18 @@ describe :string_unpack_unicode, shared: true do
"\xc2\x80".unpack("UUUU").should == [0x80]
end
- it "ignores NULL bytes between directives" do
- "\x01\x02".unpack("U\x00U").should == [1, 2]
+ ruby_version_is ""..."3.3" do
+ it "ignores NULL bytes between directives" do
+ "\x01\x02".unpack("U\x00U").should == [1, 2]
+ end
+ end
+
+ ruby_version_is "3.3" do
+ it "raise ArgumentError for NULL bytes between directives" do
+ -> {
+ "\x01\x02".unpack("U\x00U")
+ }.should raise_error(ArgumentError, /unknown unpack directive/)
+ end
end
it "ignores spaces between directives" do
diff --git a/spec/ruby/core/string/unpack/w_spec.rb b/spec/ruby/core/string/unpack/w_spec.rb
index 011c75f5c4..b213b32921 100644
--- a/spec/ruby/core/string/unpack/w_spec.rb
+++ b/spec/ruby/core/string/unpack/w_spec.rb
@@ -15,8 +15,18 @@ describe "String#unpack with directive 'w'" do
].should be_computed_by(:unpack, "w")
end
- it "ignores NULL bytes between directives" do
- "\x01\x02\x03".unpack("w\x00w").should == [1, 2]
+ ruby_version_is ""..."3.3" do
+ it "ignores NULL bytes between directives" do
+ "\x01\x02\x03".unpack("w\x00w").should == [1, 2]
+ end
+ end
+
+ ruby_version_is "3.3" do
+ it "raise ArgumentError for NULL bytes between directives" do
+ -> {
+ "\x01\x02\x03".unpack("w\x00w")
+ }.should raise_error(ArgumentError, /unknown unpack directive/)
+ end
end
it "ignores spaces between directives" do
diff --git a/spec/ruby/core/string/unpack1_spec.rb b/spec/ruby/core/string/unpack1_spec.rb
index f59bd92d6a..df830916a3 100644
--- a/spec/ruby/core/string/unpack1_spec.rb
+++ b/spec/ruby/core/string/unpack1_spec.rb
@@ -15,16 +15,22 @@ describe "String#unpack1" do
"ZA".unpack1("B*", offset: 1).should == "01000001"
end
+ it "traits offset as a bytes offset" do
+ "؈".unpack("CC").should == [216, 136]
+ "؈".unpack1("C").should == 216
+ "؈".unpack1("C", offset: 1).should == 136
+ end
+
it "raises an ArgumentError when the offset is negative" do
- -> { "a".unpack1("C", offset: -1) }.should raise_error(ArgumentError)
+ -> { "a".unpack1("C", offset: -1) }.should raise_error(ArgumentError, "offset can't be negative")
end
it "returns nil if the offset is at the end of the string" do
"a".unpack1("C", offset: 1).should == nil
end
- it "raises an ArgumentError when the offset is larget than the string" do
- -> { "a".unpack1("C", offset: 2) }.should raise_error(ArgumentError)
+ it "raises an ArgumentError when the offset is larger than the string bytesize" do
+ -> { "a".unpack1("C", offset: 2) }.should raise_error(ArgumentError, "offset outside of string")
end
end
end
diff --git a/spec/ruby/core/string/unpack_spec.rb b/spec/ruby/core/string/unpack_spec.rb
new file mode 100644
index 0000000000..4ff7d07460
--- /dev/null
+++ b/spec/ruby/core/string/unpack_spec.rb
@@ -0,0 +1,34 @@
+require_relative '../../spec_helper'
+
+describe "String#unpack" do
+ it "raises a TypeError when passed nil" do
+ -> { "abc".unpack(nil) }.should raise_error(TypeError)
+ end
+
+ it "raises a TypeError when passed an Integer" do
+ -> { "abc".unpack(1) }.should raise_error(TypeError)
+ end
+
+ ruby_version_is "3.1" do
+ it "starts unpacking from the given offset" do
+ "abc".unpack("CC", offset: 1).should == [98, 99]
+ end
+
+ it "traits offset as a bytes offset" do
+ "؈".unpack("CC").should == [216, 136]
+ "؈".unpack("CC", offset: 1).should == [136, nil]
+ end
+
+ it "raises an ArgumentError when the offset is negative" do
+ -> { "a".unpack("C", offset: -1) }.should raise_error(ArgumentError, "offset can't be negative")
+ end
+
+ it "returns nil if the offset is at the end of the string" do
+ "a".unpack("C", offset: 1).should == [nil]
+ end
+
+ it "raises an ArgumentError when the offset is larget than the string" do
+ -> { "a".unpack("C", offset: 2) }.should raise_error(ArgumentError, "offset outside of string")
+ end
+ end
+end \ No newline at end of file
diff --git a/spec/ruby/core/string/upcase_spec.rb b/spec/ruby/core/string/upcase_spec.rb
index 209fe73b6e..5ce7b0b95f 100644
--- a/spec/ruby/core/string/upcase_spec.rb
+++ b/spec/ruby/core/string/upcase_spec.rb
@@ -8,6 +8,10 @@ describe "String#upcase" do
"hello".upcase.should == "HELLO"
end
+ it "returns a String in the same encoding as self" do
+ "hello".encode("US-ASCII").upcase.encoding.should == Encoding::US_ASCII
+ end
+
describe "full Unicode case mapping" do
it "works for all of Unicode with no option" do
"äöü".upcase.should == "ÄÖÜ"
diff --git a/spec/ruby/core/string/valid_encoding/utf_8_spec.rb b/spec/ruby/core/string/valid_encoding/utf_8_spec.rb
new file mode 100644
index 0000000000..a14c3af830
--- /dev/null
+++ b/spec/ruby/core/string/valid_encoding/utf_8_spec.rb
@@ -0,0 +1,214 @@
+# -*- encoding: utf-8 -*-
+require_relative '../../../spec_helper'
+
+describe "String#valid_encoding? and UTF-8" do
+ def utf8(bytes)
+ bytes.pack("C*").force_encoding("UTF-8")
+ end
+
+ describe "1-byte character" do
+ it "is valid if is in format 0xxxxxxx" do
+ utf8([0b00000000]).valid_encoding?.should == true
+ utf8([0b01111111]).valid_encoding?.should == true
+ end
+
+ it "is not valid if is not in format 0xxxxxxx" do
+ utf8([0b10000000]).valid_encoding?.should == false
+ utf8([0b11111111]).valid_encoding?.should == false
+ end
+ end
+
+ describe "2-bytes character" do
+ it "is valid if in format [110xxxxx 10xxxxx]" do
+ utf8([0b11000010, 0b10000000]).valid_encoding?.should == true
+ utf8([0b11000010, 0b10111111]).valid_encoding?.should == true
+
+ utf8([0b11011111, 0b10000000]).valid_encoding?.should == true
+ utf8([0b11011111, 0b10111111]).valid_encoding?.should == true
+ end
+
+ it "is not valid if the first byte is not in format 110xxxxx" do
+ utf8([0b00000010, 0b10000000]).valid_encoding?.should == false
+ utf8([0b00100010, 0b10000000]).valid_encoding?.should == false
+ utf8([0b01000010, 0b10000000]).valid_encoding?.should == false
+ utf8([0b01100010, 0b10000000]).valid_encoding?.should == false
+ utf8([0b10000010, 0b10000000]).valid_encoding?.should == false
+ utf8([0b10100010, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11000010, 0b10000000]).valid_encoding?.should == true # correct bytes
+ utf8([0b11100010, 0b10000000]).valid_encoding?.should == false
+ end
+
+ it "is not valid if the second byte is not in format 10xxxxxx" do
+ utf8([0b11000010, 0b00000000]).valid_encoding?.should == false
+ utf8([0b11000010, 0b01000000]).valid_encoding?.should == false
+ utf8([0b11000010, 0b11000000]).valid_encoding?.should == false
+ end
+
+ it "is not valid if is smaller than [xxxxxx10 xx000000] (codepoints < U+007F, that are encoded with the 1-byte format)" do
+ utf8([0b11000000, 0b10111111]).valid_encoding?.should == false
+ utf8([0b11000001, 0b10111111]).valid_encoding?.should == false
+ end
+
+ it "is not valid if the first byte is missing" do
+ bytes = [0b11000010, 0b10000000]
+ utf8(bytes[1..1]).valid_encoding?.should == false
+ end
+
+ it "is not valid if the second byte is missing" do
+ bytes = [0b11000010, 0b10000000]
+ utf8(bytes[0..0]).valid_encoding?.should == false
+ end
+ end
+
+ describe "3-bytes character" do
+ it "is valid if in format [1110xxxx 10xxxxxx 10xxxxxx]" do
+ utf8([0b11100000, 0b10100000, 0b10000000]).valid_encoding?.should == true
+ utf8([0b11100000, 0b10100000, 0b10111111]).valid_encoding?.should == true
+ utf8([0b11100000, 0b10111111, 0b10111111]).valid_encoding?.should == true
+ utf8([0b11101111, 0b10111111, 0b10111111]).valid_encoding?.should == true
+ end
+
+ it "is not valid if the first byte is not in format 1110xxxx" do
+ utf8([0b00000000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b00010000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b00100000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b00110000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b01000000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b01010000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b01100000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b01110000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b10000000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b10010000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b10100000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b10110000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11000000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11010000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11100000, 0b10100000, 0b10000000]).valid_encoding?.should == true # correct bytes
+ utf8([0b11110000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+ end
+
+ it "is not valid if the second byte is not in format 10xxxxxx" do
+ utf8([0b11100000, 0b00100000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11100000, 0b01100000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11100000, 0b11100000, 0b10000000]).valid_encoding?.should == false
+ end
+
+ it "is not valid if the third byte is not in format 10xxxxxx" do
+ utf8([0b11100000, 0b10100000, 0b00000000]).valid_encoding?.should == false
+ utf8([0b11100000, 0b10100000, 0b01000000]).valid_encoding?.should == false
+ utf8([0b11100000, 0b10100000, 0b01000000]).valid_encoding?.should == false
+ end
+
+ it "is not valid if is smaller than [xxxx0000 xx100000 xx000000] (codepoints < U+07FF that are encoded with the 2-byte format)" do
+ utf8([0b11100000, 0b10010000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11100000, 0b10001000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11100000, 0b10000100, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11100000, 0b10000010, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11100000, 0b10000001, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11100000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+ end
+
+ it "is not valid if in range [xxxx1101 xx100000 xx000000] - [xxxx1101 xx111111 xx111111] (codepoints U+D800 - U+DFFF)" do
+ utf8([0b11101101, 0b10100000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11101101, 0b10100000, 0b10000001]).valid_encoding?.should == false
+ utf8([0b11101101, 0b10111111, 0b10111111]).valid_encoding?.should == false
+
+ utf8([0b11101101, 0b10011111, 0b10111111]).valid_encoding?.should == true # lower boundary - 1
+ utf8([0b11101110, 0b10000000, 0b10000000]).valid_encoding?.should == true # upper boundary + 1
+ end
+
+ it "is not valid if the first byte is missing" do
+ bytes = [0b11100000, 0b10100000, 0b10000000]
+ utf8(bytes[2..3]).valid_encoding?.should == false
+ end
+
+ it "is not valid if the second byte is missing" do
+ bytes = [0b11100000, 0b10100000, 0b10000000]
+ utf8([bytes[0], bytes[2]]).valid_encoding?.should == false
+ end
+
+ it "is not valid if the second and the third bytes are missing" do
+ bytes = [0b11100000, 0b10100000, 0b10000000]
+ utf8(bytes[0..0]).valid_encoding?.should == false
+ end
+ end
+
+ describe "4-bytes character" do
+ it "is valid if in format [11110xxx 10xxxxxx 10xxxxxx 10xxxxxx]" do
+ utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true
+ utf8([0b11110000, 0b10010000, 0b10000000, 0b10111111]).valid_encoding?.should == true
+ utf8([0b11110000, 0b10010000, 0b10111111, 0b10111111]).valid_encoding?.should == true
+ utf8([0b11110000, 0b10111111, 0b10111111, 0b10111111]).valid_encoding?.should == true
+ utf8([0b11110100, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == true
+ end
+
+ it "is not valid if the first byte is not in format 11110xxx" do
+ utf8([0b11100000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11010000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b10110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b01110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+ end
+
+ it "is not valid if the second byte is not in format 10xxxxxx" do
+ utf8([0b11110000, 0b00010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11110000, 0b01010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true # correct bytes
+ utf8([0b11110000, 0b11010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+ end
+
+ it "is not valid if the third byte is not in format 10xxxxxx" do
+ utf8([0b11110000, 0b10010000, 0b00000000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11110000, 0b10010000, 0b01000000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true # correct bytes
+ utf8([0b11110000, 0b10010000, 0b11000000, 0b10000000]).valid_encoding?.should == false
+ end
+
+ it "is not valid if the forth byte is not in format 10xxxxxx" do
+ utf8([0b11110000, 0b10010000, 0b10000000, 0b00000000]).valid_encoding?.should == false
+ utf8([0b11110000, 0b10010000, 0b10000000, 0b01000000]).valid_encoding?.should == false
+ utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true # correct bytes
+ utf8([0b11110000, 0b10010000, 0b10000000, 0b11000000]).valid_encoding?.should == false
+ end
+
+ it "is not valid if is smaller than [xxxxx000 xx001000 xx000000 xx000000] (codepoint < U+10000)" do
+ utf8([0b11110000, 0b10000111, 0b10000000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11110000, 0b10000110, 0b10000000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11110000, 0b10000101, 0b10000000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11110000, 0b10000100, 0b10000000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11110000, 0b10000011, 0b10000000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11110000, 0b10000010, 0b10000000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11110000, 0b10000001, 0b10000000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11110000, 0b10000000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+ end
+
+ it "is not valid if is greater than [xxxxx100 xx001111 xx111111 xx111111] (codepoint > U+10FFFF)" do
+ utf8([0b11110100, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11110100, 0b10100000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+ utf8([0b11110100, 0b10110000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+
+ utf8([0b11110101, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == false
+ utf8([0b11110110, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == false
+ utf8([0b11110111, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == false
+ end
+
+ it "is not valid if the first byte is missing" do
+ bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000]
+ utf8(bytes[1..3]).valid_encoding?.should == false
+ end
+
+ it "is not valid if the second byte is missing" do
+ bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000]
+ utf8([bytes[0], bytes[2], bytes[3]]).valid_encoding?.should == false
+ end
+
+ it "is not valid if the second and the third bytes are missing" do
+ bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000]
+ utf8([bytes[0], bytes[3]]).valid_encoding?.should == false
+ end
+
+ it "is not valid if the second, the third and the fourth bytes are missing" do
+ bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000]
+ utf8(bytes[0..0]).valid_encoding?.should == false
+ end
+ end
+end