diff options
Diffstat (limited to 'spec/ruby/core/string')
37 files changed, 610 insertions, 78 deletions
diff --git a/spec/ruby/core/string/byteslice_spec.rb b/spec/ruby/core/string/byteslice_spec.rb index a49da040eb..312229523d 100644 --- a/spec/ruby/core/string/byteslice_spec.rb +++ b/spec/ruby/core/string/byteslice_spec.rb @@ -24,4 +24,10 @@ describe "String#byteslice on on non ASCII strings" do "\u3042".byteslice(1..2).should == "\x81\x82".force_encoding("UTF-8") "\u3042".byteslice(-1).should == "\x82".force_encoding("UTF-8") end + + it "returns a String in the same encoding as self" do + "ruby".encode("UTF-8").slice(0).encoding.should == Encoding::UTF_8 + "ruby".encode("US-ASCII").slice(0).encoding.should == Encoding::US_ASCII + "ruby".encode("Windows-1251").slice(0).encoding.should == Encoding::Windows_1251 + end end diff --git a/spec/ruby/core/string/capitalize_spec.rb b/spec/ruby/core/string/capitalize_spec.rb index 751f4160a6..3f85cf5ae4 100644 --- a/spec/ruby/core/string/capitalize_spec.rb +++ b/spec/ruby/core/string/capitalize_spec.rb @@ -91,6 +91,10 @@ describe "String#capitalize" do StringSpecs::MyString.new("Hello").capitalize.should be_an_instance_of(String) end end + + it "returns a String in the same encoding as self" do + "h".encode("US-ASCII").capitalize.encoding.should == Encoding::US_ASCII + end end describe "String#capitalize!" do diff --git a/spec/ruby/core/string/chars_spec.rb b/spec/ruby/core/string/chars_spec.rb index e4f26bc0cc..715e65dc90 100644 --- a/spec/ruby/core/string/chars_spec.rb +++ b/spec/ruby/core/string/chars_spec.rb @@ -1,5 +1,4 @@ require_relative 'shared/chars' -require_relative 'shared/each_char_without_block' describe "String#chars" do it_behaves_like :string_chars, :chars @@ -7,4 +6,10 @@ describe "String#chars" do it "returns an array when no block given" do "hello".chars.should == ['h', 'e', 'l', 'l', 'o'] end + + it "returns Strings in the same encoding as self" do + "hello".encode("US-ASCII").chars.each do |c| + c.encoding.should == Encoding::US_ASCII + end + end end diff --git a/spec/ruby/core/string/chomp_spec.rb b/spec/ruby/core/string/chomp_spec.rb index c03bfc7951..d0508d938f 100644 --- a/spec/ruby/core/string/chomp_spec.rb +++ b/spec/ruby/core/string/chomp_spec.rb @@ -40,6 +40,10 @@ describe "String#chomp" do "".chomp.should == "" end + it "returns a String in the same encoding as self" do + "abc\n\n".encode("US-ASCII").chomp.encoding.should == Encoding::US_ASCII + end + ruby_version_is ''...'3.0' do it "returns subclass instances when called on a subclass" do str = StringSpecs::MyString.new("hello\n").chomp diff --git a/spec/ruby/core/string/chop_spec.rb b/spec/ruby/core/string/chop_spec.rb index 266d973f67..f598d34bc8 100644 --- a/spec/ruby/core/string/chop_spec.rb +++ b/spec/ruby/core/string/chop_spec.rb @@ -60,6 +60,10 @@ describe "String#chop" do StringSpecs::MyString.new("hello\n").chop.should be_an_instance_of(String) end end + + it "returns a String in the same encoding as self" do + "abc\n\n".encode("US-ASCII").chop.encoding.should == Encoding::US_ASCII + end end describe "String#chop!" do diff --git a/spec/ruby/core/string/clone_spec.rb b/spec/ruby/core/string/clone_spec.rb index f8d40423f0..a2ba2f9877 100644 --- a/spec/ruby/core/string/clone_spec.rb +++ b/spec/ruby/core/string/clone_spec.rb @@ -54,4 +54,8 @@ describe "String#clone" do orig.should == "xtring" clone.should == "string" end + + it "returns a String in the same encoding as self" do + "a".encode("US-ASCII").clone.encoding.should == Encoding::US_ASCII + end end diff --git a/spec/ruby/core/string/delete_prefix_spec.rb b/spec/ruby/core/string/delete_prefix_spec.rb index 17ce18bcca..238de85f05 100644 --- a/spec/ruby/core/string/delete_prefix_spec.rb +++ b/spec/ruby/core/string/delete_prefix_spec.rb @@ -51,6 +51,10 @@ describe "String#delete_prefix" do s.delete_prefix('hell').should be_an_instance_of(String) end end + + it "returns a String in the same encoding as self" do + 'hello'.encode("US-ASCII").delete_prefix('hell').encoding.should == Encoding::US_ASCII + end end describe "String#delete_prefix!" do diff --git a/spec/ruby/core/string/delete_spec.rb b/spec/ruby/core/string/delete_spec.rb index b91e88b76f..87831a9d19 100644 --- a/spec/ruby/core/string/delete_spec.rb +++ b/spec/ruby/core/string/delete_spec.rb @@ -95,6 +95,10 @@ describe "String#delete" do StringSpecs::MyString.new("oh no!!!").delete("!").should be_an_instance_of(String) end end + + it "returns a String in the same encoding as self" do + "hello".encode("US-ASCII").delete("lo").encoding.should == Encoding::US_ASCII + end end describe "String#delete!" do diff --git a/spec/ruby/core/string/delete_suffix_spec.rb b/spec/ruby/core/string/delete_suffix_spec.rb index 0705c73246..6883d6938c 100644 --- a/spec/ruby/core/string/delete_suffix_spec.rb +++ b/spec/ruby/core/string/delete_suffix_spec.rb @@ -51,6 +51,10 @@ describe "String#delete_suffix" do s.delete_suffix('ello').should be_an_instance_of(String) end end + + it "returns a String in the same encoding as self" do + "hello".encode("US-ASCII").delete_suffix("ello").encoding.should == Encoding::US_ASCII + end end describe "String#delete_suffix!" do diff --git a/spec/ruby/core/string/downcase_spec.rb b/spec/ruby/core/string/downcase_spec.rb index f0a15f1e25..153b4ce191 100644 --- a/spec/ruby/core/string/downcase_spec.rb +++ b/spec/ruby/core/string/downcase_spec.rb @@ -8,6 +8,10 @@ describe "String#downcase" do "hello".downcase.should == "hello" end + it "returns a String in the same encoding as self" do + "hELLO".encode("US-ASCII").downcase.encoding.should == Encoding::US_ASCII + end + describe "full Unicode case mapping" do it "works for all of Unicode with no option" do "ÄÖÜ".downcase.should == "äöü" diff --git a/spec/ruby/core/string/dump_spec.rb b/spec/ruby/core/string/dump_spec.rb index 79a8b55e6d..81de0cfae4 100644 --- a/spec/ruby/core/string/dump_spec.rb +++ b/spec/ruby/core/string/dump_spec.rb @@ -350,7 +350,7 @@ describe "String#dump" do ].should be_computed_by(:dump) end - it "returns a string with multi-byte UTF-8 characters replaced by \\u{} notation with upper-case hex digits" do + it "returns a string with multi-byte UTF-8 characters less than or equal 0xFFFF replaced by \\uXXXX notation with upper-case hex digits" do [ [0200.chr('utf-8'), '"\u0080"'], [0201.chr('utf-8'), '"\u0081"'], [0202.chr('utf-8'), '"\u0082"'], @@ -382,15 +382,21 @@ describe "String#dump" do [0235.chr('utf-8'), '"\u009D"'], [0236.chr('utf-8'), '"\u009E"'], [0237.chr('utf-8'), '"\u009F"'], + [0177777.chr('utf-8'), '"\uFFFF"'], ].should be_computed_by(:dump) end + it "returns a string with multi-byte UTF-8 characters greater than 0xFFFF replaced by \\u{XXXXXX} notation with upper-case hex digits" do + 0x10000.chr('utf-8').dump.should == '"\u{10000}"' + 0x10FFFF.chr('utf-8').dump.should == '"\u{10FFFF}"' + end + it "includes .force_encoding(name) if the encoding isn't ASCII compatible" do "\u{876}".encode('utf-16be').dump.should.end_with?(".force_encoding(\"UTF-16BE\")") "\u{876}".encode('utf-16le').dump.should.end_with?(".force_encoding(\"UTF-16LE\")") end - it "keeps origin encoding" do + it "returns a String in the same encoding as self" do "foo".encode("ISO-8859-1").dump.encoding.should == Encoding::ISO_8859_1 "foo".encode('windows-1251').dump.encoding.should == Encoding::Windows_1251 1.chr.dump.encoding.should == Encoding::US_ASCII diff --git a/spec/ruby/core/string/dup_spec.rb b/spec/ruby/core/string/dup_spec.rb index eec3cf0a70..73f71b8ffc 100644 --- a/spec/ruby/core/string/dup_spec.rb +++ b/spec/ruby/core/string/dup_spec.rb @@ -58,4 +58,8 @@ describe "String#dup" do orig.should == "c" copy.should == "b" end + + it "returns a String in the same encoding as self" do + "hello".encode("US-ASCII").dup.encoding.should == Encoding::US_ASCII + end end diff --git a/spec/ruby/core/string/lines_spec.rb b/spec/ruby/core/string/lines_spec.rb index ad4b119074..40ab5f71d8 100644 --- a/spec/ruby/core/string/lines_spec.rb +++ b/spec/ruby/core/string/lines_spec.rb @@ -1,7 +1,6 @@ require_relative '../../spec_helper' require_relative 'fixtures/classes' require_relative 'shared/each_line' -require_relative 'shared/each_line_without_block' describe "String#lines" do it_behaves_like :string_each_line, :lines diff --git a/spec/ruby/core/string/reverse_spec.rb b/spec/ruby/core/string/reverse_spec.rb index 4206b8af90..73526256ef 100644 --- a/spec/ruby/core/string/reverse_spec.rb +++ b/spec/ruby/core/string/reverse_spec.rb @@ -37,6 +37,10 @@ describe "String#reverse" do str.reverse.should == "體黑正\xDE\xDF軟微" end + + it "returns a String in the same encoding as self" do + "stressed".encode("US-ASCII").reverse.encoding.should == Encoding::US_ASCII + end end describe "String#reverse!" do diff --git a/spec/ruby/core/string/scan_spec.rb b/spec/ruby/core/string/scan_spec.rb index ab73f5747b..a2d1815132 100644 --- a/spec/ruby/core/string/scan_spec.rb +++ b/spec/ruby/core/string/scan_spec.rb @@ -69,6 +69,12 @@ describe "String#scan" do it "does not raise any errors when passed a multi-byte string" do "あああaaaあああ".scan("あああ").should == ["あああ", "あああ"] end + + it "returns Strings in the same encoding as self" do + "cruel world".encode("US-ASCII").scan(/\w+/).each do |s| + s.encoding.should == Encoding::US_ASCII + end + end end describe "String#scan with pattern and block" do diff --git a/spec/ruby/core/string/scrub_spec.rb b/spec/ruby/core/string/scrub_spec.rb index 66755bcc7b..a51fbd020a 100644 --- a/spec/ruby/core/string/scrub_spec.rb +++ b/spec/ruby/core/string/scrub_spec.rb @@ -31,6 +31,11 @@ describe "String#scrub with a default replacement" do input.scrub.should == "abc?????" end + it "returns a String in the same encoding as self" do + x81 = [0x81].pack('C').force_encoding('utf-8') + "abc\u3042#{x81}".scrub.encoding.should == Encoding::UTF_8 + end + ruby_version_is '3.0' do it "returns String instances when called on a subclass" do StringSpecs::MyString.new("foo").scrub.should be_an_instance_of(String) @@ -80,6 +85,11 @@ describe "String#scrub with a custom replacement" do block.should raise_error(ArgumentError) end + it "returns a String in the same encoding as self" do + x81 = [0x81].pack('C').force_encoding('utf-8') + "abc\u3042#{x81}".scrub("*").encoding.should == Encoding::UTF_8 + end + it "raises TypeError when a non String replacement is given" do x81 = [0x81].pack('C').force_encoding('utf-8') block = -> { "foo#{x81}".scrub(1) } diff --git a/spec/ruby/core/string/shared/each_line.rb b/spec/ruby/core/string/shared/each_line.rb index bfedf8f35a..df78bd2186 100644 --- a/spec/ruby/core/string/shared/each_line.rb +++ b/spec/ruby/core/string/shared/each_line.rb @@ -122,6 +122,12 @@ describe :string_each_line, shared: true do out.should == ["hello\n", "world."] end + it "returns Strings in the same encoding as self" do + "one\ntwo\r\nthree".encode("US-ASCII").send(@method) do |s| + s.encoding.should == Encoding::US_ASCII + end + end + it "raises a TypeError when the separator can't be converted to a string" do -> { "hello world".send(@method, false) {} }.should raise_error(TypeError) -> { "hello world".send(@method, mock('x')) {} }.should raise_error(TypeError) diff --git a/spec/ruby/core/string/shared/partition.rb b/spec/ruby/core/string/shared/partition.rb index 7dc3d9cc0b..41b3c7e0c9 100644 --- a/spec/ruby/core/string/shared/partition.rb +++ b/spec/ruby/core/string/shared/partition.rb @@ -33,4 +33,19 @@ describe :string_partition, shared: true do end end end + + it "returns before- and after- parts in the same encoding as self" do + strings = "hello".encode("US-ASCII").send(@method, "ello") + strings[0].encoding.should == Encoding::US_ASCII + strings[2].encoding.should == Encoding::US_ASCII + + strings = "hello".encode("US-ASCII").send(@method, /ello/) + strings[0].encoding.should == Encoding::US_ASCII + strings[2].encoding.should == Encoding::US_ASCII + end + + it "returns the matching part in the separator's encoding" do + strings = "hello".encode("US-ASCII").send(@method, "ello") + strings[1].encoding.should == Encoding::UTF_8 + end end diff --git a/spec/ruby/core/string/shared/slice.rb b/spec/ruby/core/string/shared/slice.rb index 713234fffd..a7c1d05b56 100644 --- a/spec/ruby/core/string/shared/slice.rb +++ b/spec/ruby/core/string/shared/slice.rb @@ -80,7 +80,7 @@ describe :string_slice_index_length, shared: true do "hello there".send(@method, -3,2).should == "er" end - it "returns a string with the same encoding" do + it "returns a string with the same encoding as self" do s = "hello there" s.send(@method, 1, 9).encoding.should == s.encoding @@ -206,6 +206,10 @@ describe :string_slice_range, shared: true do "x".send(@method, 1..-1).should == "" end + it "returns a String in the same encoding as self" do + "hello there".encode("US-ASCII").send(@method, 1..1).encoding.should == Encoding::US_ASCII + end + it "returns nil if the beginning of the range falls outside of self" do "hello there".send(@method, 12..-1).should == nil "hello there".send(@method, 20..25).should == nil @@ -328,7 +332,8 @@ describe :string_slice_regexp, shared: true do "hello there".send(@method, /xyz/).should == nil end - not_supported_on :opal do + it "returns a String in the same encoding as self" do + "hello there".encode("US-ASCII").send(@method, /[aeiou](.)\1/).encoding.should == Encoding::US_ASCII end ruby_version_is ''...'3.0' do @@ -391,6 +396,10 @@ describe :string_slice_regexp_index, shared: true do $~[1].should == nil end + it "returns a String in the same encoding as self" do + "hello there".encode("US-ASCII").send(@method, /[aeiou](.)\1/, 0).encoding.should == Encoding::US_ASCII + end + it "calls to_int on the given index" do obj = mock('2') obj.should_receive(:to_int).and_return(2) diff --git a/spec/ruby/core/string/shared/strip.rb b/spec/ruby/core/string/shared/strip.rb index 9c232b4694..0c0aae20f3 100644 --- a/spec/ruby/core/string/shared/strip.rb +++ b/spec/ruby/core/string/shared/strip.rb @@ -2,6 +2,10 @@ require_relative '../../../spec_helper' require_relative '../fixtures/classes' describe :string_strip, shared: true do + it "returns a String in the same encoding as self" do + " hello ".encode("US-ASCII").send(@method).encoding.should == Encoding::US_ASCII + end + ruby_version_is '3.0' do it "returns String instances when called on a subclass" do StringSpecs::MyString.new(" hello ").send(@method).should be_an_instance_of(String) diff --git a/spec/ruby/core/string/shared/succ.rb b/spec/ruby/core/string/shared/succ.rb index 66edf6dc82..3605fa99a2 100644 --- a/spec/ruby/core/string/shared/succ.rb +++ b/spec/ruby/core/string/shared/succ.rb @@ -74,6 +74,10 @@ describe :string_succ, shared: true do StringSpecs::MyString.new("z").send(@method).should be_an_instance_of(String) end end + + it "returns a String in the same encoding as self" do + "z".encode("US-ASCII").send(@method).encoding.should == Encoding::US_ASCII + end end describe :string_succ_bang, shared: true do diff --git a/spec/ruby/core/string/split_spec.rb b/spec/ruby/core/string/split_spec.rb index 0417486692..519c5d845d 100644 --- a/spec/ruby/core/string/split_spec.rb +++ b/spec/ruby/core/string/split_spec.rb @@ -246,6 +246,13 @@ describe "String#split with String" do it "doesn't split on non-ascii whitespace" do "a\u{2008}b".split(" ").should == ["a\u{2008}b"] end + + it "returns Strings in the same encoding as self" do + strings = "hello world".encode("US-ASCII").split(" ") + + strings[0].encoding.should == Encoding::US_ASCII + strings[1].encoding.should == Encoding::US_ASCII + end end describe "String#split with Regexp" do @@ -443,13 +450,12 @@ describe "String#split with Regexp" do end end - it "retains the encoding of the source string" do + it "returns Strings in the same encoding as self" do ary = "а б в".split encodings = ary.map { |s| s.encoding } encodings.should == [Encoding::UTF_8, Encoding::UTF_8, Encoding::UTF_8] end - it "splits a string on each character for a multibyte encoding and empty split" do "That's why efficiency could not be helped".split("").size.should == 39 end @@ -598,4 +604,11 @@ describe "String#split with Regexp" do -> { "hello".split(false) }.should raise_error(TypeError) -> { "hello".split(Object.new) }.should raise_error(TypeError) end + + it "returns Strings in the same encoding as self" do + strings = "hello world".encode("US-ASCII").split(/ /) + + strings[0].encoding.should == Encoding::US_ASCII + strings[1].encoding.should == Encoding::US_ASCII + end end diff --git a/spec/ruby/core/string/squeeze_spec.rb b/spec/ruby/core/string/squeeze_spec.rb index 5dc12a4247..2f3fa65745 100644 --- a/spec/ruby/core/string/squeeze_spec.rb +++ b/spec/ruby/core/string/squeeze_spec.rb @@ -64,6 +64,11 @@ describe "String#squeeze" do "hello room".squeeze(other_string, other_string2).should == "hello rom" end + it "returns a String in the same encoding as self" do + "yellow moon".encode("US-ASCII").squeeze.encoding.should == Encoding::US_ASCII + "yellow moon".encode("US-ASCII").squeeze("a").encoding.should == Encoding::US_ASCII + end + it "raises a TypeError when one set arg can't be converted to a string" do -> { "hello world".squeeze([]) }.should raise_error(TypeError) -> { "hello world".squeeze(Object.new)}.should raise_error(TypeError) diff --git a/spec/ruby/core/string/swapcase_spec.rb b/spec/ruby/core/string/swapcase_spec.rb index 6307a1eaaf..d369ab3e4e 100644 --- a/spec/ruby/core/string/swapcase_spec.rb +++ b/spec/ruby/core/string/swapcase_spec.rb @@ -9,6 +9,10 @@ describe "String#swapcase" do "+++---111222???".swapcase.should == "+++---111222???" end + it "returns a String in the same encoding as self" do + "Hello".encode("US-ASCII").swapcase.encoding.should == Encoding::US_ASCII + end + describe "full Unicode case mapping" do it "works for all of Unicode with no option" do "äÖü".swapcase.should == "ÄöÜ" diff --git a/spec/ruby/core/string/undump_spec.rb b/spec/ruby/core/string/undump_spec.rb index 08058d9bd1..6ff220161c 100644 --- a/spec/ruby/core/string/undump_spec.rb +++ b/spec/ruby/core/string/undump_spec.rb @@ -389,7 +389,7 @@ describe "String#undump" do '"\\bv".force_encoding("UTF-16BE")'.undump.should == "\u0876".encode('utf-16be') end - it "keeps origin encoding" do + it "returns a String in the same encoding as self" do '"foo"'.encode("ISO-8859-1").undump.encoding.should == Encoding::ISO_8859_1 '"foo"'.encode('windows-1251').undump.encoding.should == Encoding::Windows_1251 end diff --git a/spec/ruby/core/string/unpack/b_spec.rb b/spec/ruby/core/string/unpack/b_spec.rb index fcabc99731..2cf5ebad34 100644 --- a/spec/ruby/core/string/unpack/b_spec.rb +++ b/spec/ruby/core/string/unpack/b_spec.rb @@ -86,8 +86,18 @@ describe "String#unpack with format 'B'" do ].should be_computed_by(:unpack, "BBB") end - it "ignores NULL bytes between directives" do - "\x80\x00".unpack("B\x00B").should == ["1", "0"] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + "\x80\x00".unpack("B\x00B").should == ["1", "0"] + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x80\x00".unpack("B\x00B") + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do @@ -182,8 +192,18 @@ describe "String#unpack with format 'b'" do ].should be_computed_by(:unpack, "bbb") end - it "ignores NULL bytes between directives" do - "\x01\x00".unpack("b\x00b").should == ["1", "0"] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + "\x01\x00".unpack("b\x00b").should == ["1", "0"] + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x00".unpack("b\x00b") + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/c_spec.rb b/spec/ruby/core/string/unpack/c_spec.rb index ed8caa4895..dbcbacc74d 100644 --- a/spec/ruby/core/string/unpack/c_spec.rb +++ b/spec/ruby/core/string/unpack/c_spec.rb @@ -35,8 +35,18 @@ describe :string_unpack_8bit, shared: true do ].should be_computed_by(:unpack, unpack_format(3)) end - it "ignores NULL bytes between directives" do - "abc".unpack(unpack_format("\000", 2)).should == [97, 98] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + "abc".unpack(unpack_format("\000", 2)).should == [97, 98] + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "abc".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/h_spec.rb b/spec/ruby/core/string/unpack/h_spec.rb index f2f5dcf396..ee08d20926 100644 --- a/spec/ruby/core/string/unpack/h_spec.rb +++ b/spec/ruby/core/string/unpack/h_spec.rb @@ -56,8 +56,18 @@ describe "String#unpack with format 'H'" do ].should be_computed_by(:unpack, "HHH") end - it "ignores NULL bytes between directives" do - "\x01\x10".unpack("H\x00H").should == ["0", "1"] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + "\x01\x10".unpack("H\x00H").should == ["0", "1"] + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x10".unpack("H\x00H") + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do @@ -121,8 +131,18 @@ describe "String#unpack with format 'h'" do ].should be_computed_by(:unpack, "hhh") end - it "ignores NULL bytes between directives" do - "\x01\x10".unpack("h\x00h").should == ["1", "0"] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + "\x01\x10".unpack("h\x00h").should == ["1", "0"] + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x10".unpack("h\x00h") + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/shared/basic.rb b/spec/ruby/core/string/unpack/shared/basic.rb index f636f4689f..bb5302edc5 100644 --- a/spec/ruby/core/string/unpack/shared/basic.rb +++ b/spec/ruby/core/string/unpack/shared/basic.rb @@ -8,20 +8,6 @@ describe :string_unpack_basic, shared: true do d.should_receive(:to_str).and_return("a"+unpack_format) "abc".unpack(d).should be_an_instance_of(Array) end - - it "raises a TypeError when passed nil" do - -> { "abc".unpack(nil) }.should raise_error(TypeError) - end - - it "raises a TypeError when passed an Integer" do - -> { "abc".unpack(1) }.should raise_error(TypeError) - end - - ruby_version_is "3.1" do - it "starts unpacking from the given offset" do - "abc".unpack("CC", offset: 1).should == [98, 99] - end - end end describe :string_unpack_no_platform, shared: true do @@ -32,18 +18,4 @@ describe :string_unpack_no_platform, shared: true do it "raises an ArgumentError when the format modifier is '!'" do -> { "abcdefgh".unpack(unpack_format("!")) }.should raise_error(ArgumentError) end - - ruby_version_is "3.1" do - it "raises an ArgumentError when the offset is negative" do - -> { "a".unpack("C", offset: -1) }.should raise_error(ArgumentError) - end - - it "returns nil if the offset is at the end of the string" do - "a".unpack("C", offset: 1).should == [nil] - end - - it "raises an ArgumentError when the offset is larget than the string" do - -> { "a".unpack("C", offset: 2) }.should raise_error(ArgumentError) - end - end end diff --git a/spec/ruby/core/string/unpack/shared/float.rb b/spec/ruby/core/string/unpack/shared/float.rb index 99bd8a3401..ccddf94f99 100644 --- a/spec/ruby/core/string/unpack/shared/float.rb +++ b/spec/ruby/core/string/unpack/shared/float.rb @@ -56,9 +56,19 @@ describe :string_unpack_float_le, shared: true do [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true end - it "ignores NULL bytes between directives" do - array = "\x9a\x999@33\xb3?".unpack(unpack_format("\000", 2)) - array.should == [2.9000000953674316, 1.399999976158142] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + array = "\x9a\x999@33\xb3?".unpack(unpack_format("\000", 2)) + array.should == [2.9000000953674316, 1.399999976158142] + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x9a\x999@33\xb3?".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do @@ -123,9 +133,19 @@ describe :string_unpack_float_be, shared: true do [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true end - it "ignores NULL bytes between directives" do - array = "@9\x99\x9a?\xb333".unpack(unpack_format("\000", 2)) - array.should == [2.9000000953674316, 1.399999976158142] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + array = "@9\x99\x9a?\xb333".unpack(unpack_format("\000", 2)) + array.should == [2.9000000953674316, 1.399999976158142] + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "@9\x99\x9a?\xb333".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do @@ -193,8 +213,18 @@ describe :string_unpack_double_le, shared: true do [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true end - it "ignores NULL bytes between directives" do - "333333\x07@ffffff\xf6?".unpack(unpack_format("\000", 2)).should == [2.9, 1.4] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + "333333\x07@ffffff\xf6?".unpack(unpack_format("\000", 2)).should == [2.9, 1.4] + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "333333\x07@ffffff\xf6?".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do @@ -261,8 +291,18 @@ describe :string_unpack_double_be, shared: true do [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true end - it "ignores NULL bytes between directives" do - "@\x07333333?\xf6ffffff".unpack(unpack_format("\000", 2)).should == [2.9, 1.4] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + "@\x07333333?\xf6ffffff".unpack(unpack_format("\000", 2)).should == [2.9, 1.4] + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "@\x07333333?\xf6ffffff".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/shared/integer.rb b/spec/ruby/core/string/unpack/shared/integer.rb index cbaa743683..ba4f149dad 100644 --- a/spec/ruby/core/string/unpack/shared/integer.rb +++ b/spec/ruby/core/string/unpack/shared/integer.rb @@ -32,8 +32,18 @@ describe :string_unpack_16bit_le, shared: true do ].should be_computed_by(:unpack, unpack_format(3)) end - it "ignores NULL bytes between directives" do - "abcd".unpack(unpack_format("\000", 2)).should == [25185, 25699] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + "abcd".unpack(unpack_format("\000", 2)).should == [25185, 25699] + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "abcd".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do @@ -85,8 +95,18 @@ describe :string_unpack_16bit_be, shared: true do ].should be_computed_by(:unpack, unpack_format(3)) end - it "ignores NULL bytes between directives" do - "badc".unpack(unpack_format("\000", 2)).should == [25185, 25699] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + "badc".unpack(unpack_format("\000", 2)).should == [25185, 25699] + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "badc".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do @@ -139,8 +159,18 @@ describe :string_unpack_32bit_le, shared: true do ].should be_computed_by(:unpack, unpack_format(3)) end - it "ignores NULL bytes between directives" do - "abcdefgh".unpack(unpack_format("\000", 2)).should == [1684234849, 1751606885] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + "abcdefgh".unpack(unpack_format("\000", 2)).should == [1684234849, 1751606885] + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "abcdefgh".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do @@ -193,8 +223,18 @@ describe :string_unpack_32bit_be, shared: true do ].should be_computed_by(:unpack, unpack_format(3)) end - it "ignores NULL bytes between directives" do - "dcbahgfe".unpack(unpack_format("\000", 2)).should == [1684234849, 1751606885] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + "dcbahgfe".unpack(unpack_format("\000", 2)).should == [1684234849, 1751606885] + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "dcbahgfe".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do @@ -243,9 +283,19 @@ describe :string_unpack_64bit_le, shared: true do "abc".unpack(unpack_format('*')).should == [] end - it "ignores NULL bytes between directives" do - array = "abcdefghabghefcd".unpack(unpack_format("\000", 2)) - array.should == [7523094288207667809, 7233738012216484449] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + array = "abcdefghabghefcd".unpack(unpack_format("\000", 2)) + array.should == [7523094288207667809, 7233738012216484449] + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "badc".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do @@ -305,9 +355,19 @@ describe :string_unpack_64bit_be, shared: true do "abc".unpack(unpack_format('*')).should == [] end - it "ignores NULL bytes between directives" do - array = "hgfedcbadcfehgba".unpack(unpack_format("\000", 2)) - array.should == [7523094288207667809, 7233738012216484449] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + array = "hgfedcbadcfehgba".unpack(unpack_format("\000", 2)) + array.should == [7523094288207667809, 7233738012216484449] + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "hgfedcbadcfehgba".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/shared/unicode.rb b/spec/ruby/core/string/unpack/shared/unicode.rb index a2b4e142b2..ce1f29fe87 100644 --- a/spec/ruby/core/string/unpack/shared/unicode.rb +++ b/spec/ruby/core/string/unpack/shared/unicode.rb @@ -50,8 +50,18 @@ describe :string_unpack_unicode, shared: true do "\xc2\x80".unpack("UUUU").should == [0x80] end - it "ignores NULL bytes between directives" do - "\x01\x02".unpack("U\x00U").should == [1, 2] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + "\x01\x02".unpack("U\x00U").should == [1, 2] + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x02".unpack("U\x00U") + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/w_spec.rb b/spec/ruby/core/string/unpack/w_spec.rb index 011c75f5c4..b213b32921 100644 --- a/spec/ruby/core/string/unpack/w_spec.rb +++ b/spec/ruby/core/string/unpack/w_spec.rb @@ -15,8 +15,18 @@ describe "String#unpack with directive 'w'" do ].should be_computed_by(:unpack, "w") end - it "ignores NULL bytes between directives" do - "\x01\x02\x03".unpack("w\x00w").should == [1, 2] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + "\x01\x02\x03".unpack("w\x00w").should == [1, 2] + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x02\x03".unpack("w\x00w") + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack1_spec.rb b/spec/ruby/core/string/unpack1_spec.rb index f59bd92d6a..df830916a3 100644 --- a/spec/ruby/core/string/unpack1_spec.rb +++ b/spec/ruby/core/string/unpack1_spec.rb @@ -15,16 +15,22 @@ describe "String#unpack1" do "ZA".unpack1("B*", offset: 1).should == "01000001" end + it "traits offset as a bytes offset" do + "؈".unpack("CC").should == [216, 136] + "؈".unpack1("C").should == 216 + "؈".unpack1("C", offset: 1).should == 136 + end + it "raises an ArgumentError when the offset is negative" do - -> { "a".unpack1("C", offset: -1) }.should raise_error(ArgumentError) + -> { "a".unpack1("C", offset: -1) }.should raise_error(ArgumentError, "offset can't be negative") end it "returns nil if the offset is at the end of the string" do "a".unpack1("C", offset: 1).should == nil end - it "raises an ArgumentError when the offset is larget than the string" do - -> { "a".unpack1("C", offset: 2) }.should raise_error(ArgumentError) + it "raises an ArgumentError when the offset is larger than the string bytesize" do + -> { "a".unpack1("C", offset: 2) }.should raise_error(ArgumentError, "offset outside of string") end end end diff --git a/spec/ruby/core/string/unpack_spec.rb b/spec/ruby/core/string/unpack_spec.rb new file mode 100644 index 0000000000..4ff7d07460 --- /dev/null +++ b/spec/ruby/core/string/unpack_spec.rb @@ -0,0 +1,34 @@ +require_relative '../../spec_helper' + +describe "String#unpack" do + it "raises a TypeError when passed nil" do + -> { "abc".unpack(nil) }.should raise_error(TypeError) + end + + it "raises a TypeError when passed an Integer" do + -> { "abc".unpack(1) }.should raise_error(TypeError) + end + + ruby_version_is "3.1" do + it "starts unpacking from the given offset" do + "abc".unpack("CC", offset: 1).should == [98, 99] + end + + it "traits offset as a bytes offset" do + "؈".unpack("CC").should == [216, 136] + "؈".unpack("CC", offset: 1).should == [136, nil] + end + + it "raises an ArgumentError when the offset is negative" do + -> { "a".unpack("C", offset: -1) }.should raise_error(ArgumentError, "offset can't be negative") + end + + it "returns nil if the offset is at the end of the string" do + "a".unpack("C", offset: 1).should == [nil] + end + + it "raises an ArgumentError when the offset is larget than the string" do + -> { "a".unpack("C", offset: 2) }.should raise_error(ArgumentError, "offset outside of string") + end + end +end
\ No newline at end of file diff --git a/spec/ruby/core/string/upcase_spec.rb b/spec/ruby/core/string/upcase_spec.rb index 209fe73b6e..5ce7b0b95f 100644 --- a/spec/ruby/core/string/upcase_spec.rb +++ b/spec/ruby/core/string/upcase_spec.rb @@ -8,6 +8,10 @@ describe "String#upcase" do "hello".upcase.should == "HELLO" end + it "returns a String in the same encoding as self" do + "hello".encode("US-ASCII").upcase.encoding.should == Encoding::US_ASCII + end + describe "full Unicode case mapping" do it "works for all of Unicode with no option" do "äöü".upcase.should == "ÄÖÜ" diff --git a/spec/ruby/core/string/valid_encoding/utf_8_spec.rb b/spec/ruby/core/string/valid_encoding/utf_8_spec.rb new file mode 100644 index 0000000000..a14c3af830 --- /dev/null +++ b/spec/ruby/core/string/valid_encoding/utf_8_spec.rb @@ -0,0 +1,214 @@ +# -*- encoding: utf-8 -*- +require_relative '../../../spec_helper' + +describe "String#valid_encoding? and UTF-8" do + def utf8(bytes) + bytes.pack("C*").force_encoding("UTF-8") + end + + describe "1-byte character" do + it "is valid if is in format 0xxxxxxx" do + utf8([0b00000000]).valid_encoding?.should == true + utf8([0b01111111]).valid_encoding?.should == true + end + + it "is not valid if is not in format 0xxxxxxx" do + utf8([0b10000000]).valid_encoding?.should == false + utf8([0b11111111]).valid_encoding?.should == false + end + end + + describe "2-bytes character" do + it "is valid if in format [110xxxxx 10xxxxx]" do + utf8([0b11000010, 0b10000000]).valid_encoding?.should == true + utf8([0b11000010, 0b10111111]).valid_encoding?.should == true + + utf8([0b11011111, 0b10000000]).valid_encoding?.should == true + utf8([0b11011111, 0b10111111]).valid_encoding?.should == true + end + + it "is not valid if the first byte is not in format 110xxxxx" do + utf8([0b00000010, 0b10000000]).valid_encoding?.should == false + utf8([0b00100010, 0b10000000]).valid_encoding?.should == false + utf8([0b01000010, 0b10000000]).valid_encoding?.should == false + utf8([0b01100010, 0b10000000]).valid_encoding?.should == false + utf8([0b10000010, 0b10000000]).valid_encoding?.should == false + utf8([0b10100010, 0b10000000]).valid_encoding?.should == false + utf8([0b11000010, 0b10000000]).valid_encoding?.should == true # correct bytes + utf8([0b11100010, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the second byte is not in format 10xxxxxx" do + utf8([0b11000010, 0b00000000]).valid_encoding?.should == false + utf8([0b11000010, 0b01000000]).valid_encoding?.should == false + utf8([0b11000010, 0b11000000]).valid_encoding?.should == false + end + + it "is not valid if is smaller than [xxxxxx10 xx000000] (codepoints < U+007F, that are encoded with the 1-byte format)" do + utf8([0b11000000, 0b10111111]).valid_encoding?.should == false + utf8([0b11000001, 0b10111111]).valid_encoding?.should == false + end + + it "is not valid if the first byte is missing" do + bytes = [0b11000010, 0b10000000] + utf8(bytes[1..1]).valid_encoding?.should == false + end + + it "is not valid if the second byte is missing" do + bytes = [0b11000010, 0b10000000] + utf8(bytes[0..0]).valid_encoding?.should == false + end + end + + describe "3-bytes character" do + it "is valid if in format [1110xxxx 10xxxxxx 10xxxxxx]" do + utf8([0b11100000, 0b10100000, 0b10000000]).valid_encoding?.should == true + utf8([0b11100000, 0b10100000, 0b10111111]).valid_encoding?.should == true + utf8([0b11100000, 0b10111111, 0b10111111]).valid_encoding?.should == true + utf8([0b11101111, 0b10111111, 0b10111111]).valid_encoding?.should == true + end + + it "is not valid if the first byte is not in format 1110xxxx" do + utf8([0b00000000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b00010000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b00100000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b00110000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b01000000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b01010000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b01100000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b01110000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b10000000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b10010000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b10100000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b10110000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11000000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11010000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10100000, 0b10000000]).valid_encoding?.should == true # correct bytes + utf8([0b11110000, 0b10100000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the second byte is not in format 10xxxxxx" do + utf8([0b11100000, 0b00100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b01100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b11100000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the third byte is not in format 10xxxxxx" do + utf8([0b11100000, 0b10100000, 0b00000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10100000, 0b01000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10100000, 0b01000000]).valid_encoding?.should == false + end + + it "is not valid if is smaller than [xxxx0000 xx100000 xx000000] (codepoints < U+07FF that are encoded with the 2-byte format)" do + utf8([0b11100000, 0b10010000, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10001000, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10000100, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10000010, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10000001, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10000000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if in range [xxxx1101 xx100000 xx000000] - [xxxx1101 xx111111 xx111111] (codepoints U+D800 - U+DFFF)" do + utf8([0b11101101, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11101101, 0b10100000, 0b10000001]).valid_encoding?.should == false + utf8([0b11101101, 0b10111111, 0b10111111]).valid_encoding?.should == false + + utf8([0b11101101, 0b10011111, 0b10111111]).valid_encoding?.should == true # lower boundary - 1 + utf8([0b11101110, 0b10000000, 0b10000000]).valid_encoding?.should == true # upper boundary + 1 + end + + it "is not valid if the first byte is missing" do + bytes = [0b11100000, 0b10100000, 0b10000000] + utf8(bytes[2..3]).valid_encoding?.should == false + end + + it "is not valid if the second byte is missing" do + bytes = [0b11100000, 0b10100000, 0b10000000] + utf8([bytes[0], bytes[2]]).valid_encoding?.should == false + end + + it "is not valid if the second and the third bytes are missing" do + bytes = [0b11100000, 0b10100000, 0b10000000] + utf8(bytes[0..0]).valid_encoding?.should == false + end + end + + describe "4-bytes character" do + it "is valid if in format [11110xxx 10xxxxxx 10xxxxxx 10xxxxxx]" do + utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true + utf8([0b11110000, 0b10010000, 0b10000000, 0b10111111]).valid_encoding?.should == true + utf8([0b11110000, 0b10010000, 0b10111111, 0b10111111]).valid_encoding?.should == true + utf8([0b11110000, 0b10111111, 0b10111111, 0b10111111]).valid_encoding?.should == true + utf8([0b11110100, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == true + end + + it "is not valid if the first byte is not in format 11110xxx" do + utf8([0b11100000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11010000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b10110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b01110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the second byte is not in format 10xxxxxx" do + utf8([0b11110000, 0b00010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b01010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true # correct bytes + utf8([0b11110000, 0b11010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the third byte is not in format 10xxxxxx" do + utf8([0b11110000, 0b10010000, 0b00000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10010000, 0b01000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true # correct bytes + utf8([0b11110000, 0b10010000, 0b11000000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the forth byte is not in format 10xxxxxx" do + utf8([0b11110000, 0b10010000, 0b10000000, 0b00000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10010000, 0b10000000, 0b01000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true # correct bytes + utf8([0b11110000, 0b10010000, 0b10000000, 0b11000000]).valid_encoding?.should == false + end + + it "is not valid if is smaller than [xxxxx000 xx001000 xx000000 xx000000] (codepoint < U+10000)" do + utf8([0b11110000, 0b10000111, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000110, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000101, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000100, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000011, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000010, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000001, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000000, 0b10000000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if is greater than [xxxxx100 xx001111 xx111111 xx111111] (codepoint > U+10FFFF)" do + utf8([0b11110100, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110100, 0b10100000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110100, 0b10110000, 0b10000000, 0b10000000]).valid_encoding?.should == false + + utf8([0b11110101, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == false + utf8([0b11110110, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == false + utf8([0b11110111, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == false + end + + it "is not valid if the first byte is missing" do + bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000] + utf8(bytes[1..3]).valid_encoding?.should == false + end + + it "is not valid if the second byte is missing" do + bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000] + utf8([bytes[0], bytes[2], bytes[3]]).valid_encoding?.should == false + end + + it "is not valid if the second and the third bytes are missing" do + bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000] + utf8([bytes[0], bytes[3]]).valid_encoding?.should == false + end + + it "is not valid if the second, the third and the fourth bytes are missing" do + bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000] + utf8(bytes[0..0]).valid_encoding?.should == false + end + end +end |