37 files changed, 610 insertions, 78 deletions
diff --git a/spec/ruby/core/string/byteslice_spec.rb b/spec/ruby/core/string/byteslice_spec.rb
index a49da040eb..312229523d 100644
--- a/spec/ruby/core/string/byteslice_spec.rb
+++ b/spec/ruby/core/string/byteslice_spec.rb
@@ -24,4 +24,10 @@ describe "String#byteslice on on non ASCII strings" do
     "\u3042".byteslice(1..2).should == "\x81\x82".force_encoding("UTF-8")
     "\u3042".byteslice(-1).should == "\x82".force_encoding("UTF-8")
   end
+
+  it "returns a String in the same encoding as self" do
+    "ruby".encode("UTF-8").slice(0).encoding.should == Encoding::UTF_8
+    "ruby".encode("US-ASCII").slice(0).encoding.should == Encoding::US_ASCII
+    "ruby".encode("Windows-1251").slice(0).encoding.should == Encoding::Windows_1251
+  end
 end
diff --git a/spec/ruby/core/string/capitalize_spec.rb b/spec/ruby/core/string/capitalize_spec.rb
index 751f4160a6..3f85cf5ae4 100644
--- a/spec/ruby/core/string/capitalize_spec.rb
+++ b/spec/ruby/core/string/capitalize_spec.rb
@@ -91,6 +91,10 @@ describe "String#capitalize" do
       StringSpecs::MyString.new("Hello").capitalize.should be_an_instance_of(String)
     end
   end
+
+  it "returns a String in the same encoding as self" do
+    "h".encode("US-ASCII").capitalize.encoding.should == Encoding::US_ASCII
+  end
 end
 
 describe "String#capitalize!" do
diff --git a/spec/ruby/core/string/chars_spec.rb b/spec/ruby/core/string/chars_spec.rb
index e4f26bc0cc..715e65dc90 100644
--- a/spec/ruby/core/string/chars_spec.rb
+++ b/spec/ruby/core/string/chars_spec.rb
@@ -1,5 +1,4 @@
 require_relative 'shared/chars'
-require_relative 'shared/each_char_without_block'
 
 describe "String#chars" do
   it_behaves_like :string_chars, :chars
@@ -7,4 +6,10 @@ describe "String#chars" do
   it "returns an array when no block given" do
     "hello".chars.should == ['h', 'e', 'l', 'l', 'o']
   end
+
+  it "returns Strings in the same encoding as self" do
+    "hello".encode("US-ASCII").chars.each do |c|
+      c.encoding.should == Encoding::US_ASCII
+    end
+  end
 end
diff --git a/spec/ruby/core/string/chomp_spec.rb b/spec/ruby/core/string/chomp_spec.rb
index c03bfc7951..d0508d938f 100644
--- a/spec/ruby/core/string/chomp_spec.rb
+++ b/spec/ruby/core/string/chomp_spec.rb
@@ -40,6 +40,10 @@ describe "String#chomp" do
       "".chomp.should == ""
     end
 
+    it "returns a String in the same encoding as self" do
+      "abc\n\n".encode("US-ASCII").chomp.encoding.should == Encoding::US_ASCII
+    end
+
     ruby_version_is ''...'3.0' do
       it "returns subclass instances when called on a subclass" do
         str = StringSpecs::MyString.new("hello\n").chomp
diff --git a/spec/ruby/core/string/chop_spec.rb b/spec/ruby/core/string/chop_spec.rb
index 266d973f67..f598d34bc8 100644
--- a/spec/ruby/core/string/chop_spec.rb
+++ b/spec/ruby/core/string/chop_spec.rb
@@ -60,6 +60,10 @@ describe "String#chop" do
       StringSpecs::MyString.new("hello\n").chop.should be_an_instance_of(String)
     end
   end
+
+  it "returns a String in the same encoding as self" do
+    "abc\n\n".encode("US-ASCII").chop.encoding.should == Encoding::US_ASCII
+  end
 end
 
 describe "String#chop!" do
diff --git a/spec/ruby/core/string/clone_spec.rb b/spec/ruby/core/string/clone_spec.rb
index f8d40423f0..a2ba2f9877 100644
--- a/spec/ruby/core/string/clone_spec.rb
+++ b/spec/ruby/core/string/clone_spec.rb
@@ -54,4 +54,8 @@ describe "String#clone" do
     orig.should == "xtring"
     clone.should == "string"
   end
+
+  it "returns a String in the same encoding as self" do
+    "a".encode("US-ASCII").clone.encoding.should == Encoding::US_ASCII
+  end
 end
diff --git a/spec/ruby/core/string/delete_prefix_spec.rb b/spec/ruby/core/string/delete_prefix_spec.rb
index 17ce18bcca..238de85f05 100644
--- a/spec/ruby/core/string/delete_prefix_spec.rb
+++ b/spec/ruby/core/string/delete_prefix_spec.rb
@@ -51,6 +51,10 @@ describe "String#delete_prefix" do
       s.delete_prefix('hell').should be_an_instance_of(String)
     end
   end
+
+  it "returns a String in the same encoding as self" do
+    'hello'.encode("US-ASCII").delete_prefix('hell').encoding.should == Encoding::US_ASCII
+  end
 end
 
 describe "String#delete_prefix!" do
diff --git a/spec/ruby/core/string/delete_spec.rb b/spec/ruby/core/string/delete_spec.rb
index b91e88b76f..87831a9d19 100644
--- a/spec/ruby/core/string/delete_spec.rb
+++ b/spec/ruby/core/string/delete_spec.rb
@@ -95,6 +95,10 @@ describe "String#delete" do
       StringSpecs::MyString.new("oh no!!!").delete("!").should be_an_instance_of(String)
     end
   end
+
+  it "returns a String in the same encoding as self" do
+    "hello".encode("US-ASCII").delete("lo").encoding.should == Encoding::US_ASCII
+  end
 end
 
 describe "String#delete!" do
diff --git a/spec/ruby/core/string/delete_suffix_spec.rb b/spec/ruby/core/string/delete_suffix_spec.rb
index 0705c73246..6883d6938c 100644
--- a/spec/ruby/core/string/delete_suffix_spec.rb
+++ b/spec/ruby/core/string/delete_suffix_spec.rb
@@ -51,6 +51,10 @@ describe "String#delete_suffix" do
       s.delete_suffix('ello').should be_an_instance_of(String)
     end
   end
+
+  it "returns a String in the same encoding as self" do
+    "hello".encode("US-ASCII").delete_suffix("ello").encoding.should == Encoding::US_ASCII
+  end
 end
 
 describe "String#delete_suffix!" do
diff --git a/spec/ruby/core/string/downcase_spec.rb b/spec/ruby/core/string/downcase_spec.rb
index f0a15f1e25..153b4ce191 100644
--- a/spec/ruby/core/string/downcase_spec.rb
+++ b/spec/ruby/core/string/downcase_spec.rb
@@ -8,6 +8,10 @@ describe "String#downcase" do
     "hello".downcase.should == "hello"
   end
 
+  it "returns a String in the same encoding as self" do
+    "hELLO".encode("US-ASCII").downcase.encoding.should == Encoding::US_ASCII
+  end
+
   describe "full Unicode case mapping" do
     it "works for all of Unicode with no option" do
       "ÄÖÜ".downcase.should == "äöü"
diff --git a/spec/ruby/core/string/dump_spec.rb b/spec/ruby/core/string/dump_spec.rb
index 79a8b55e6d..81de0cfae4 100644
--- a/spec/ruby/core/string/dump_spec.rb
+++ b/spec/ruby/core/string/dump_spec.rb
@@ -350,7 +350,7 @@ describe "String#dump" do
     ].should be_computed_by(:dump)
   end
 
-  it "returns a string with multi-byte UTF-8 characters replaced by \\u{} notation with upper-case hex digits" do
+  it "returns a string with multi-byte UTF-8 characters less than or equal 0xFFFF replaced by \\uXXXX notation with upper-case hex digits" do
     [ [0200.chr('utf-8'), '"\u0080"'],
       [0201.chr('utf-8'), '"\u0081"'],
       [0202.chr('utf-8'), '"\u0082"'],
@@ -382,15 +382,21 @@ describe "String#dump" do
       [0235.chr('utf-8'), '"\u009D"'],
       [0236.chr('utf-8'), '"\u009E"'],
       [0237.chr('utf-8'), '"\u009F"'],
+      [0177777.chr('utf-8'), '"\uFFFF"'],
     ].should be_computed_by(:dump)
   end
 
+  it "returns a string with multi-byte UTF-8 characters greater than 0xFFFF replaced by \\u{XXXXXX} notation with upper-case hex digits" do
+    0x10000.chr('utf-8').dump.should == '"\u{10000}"'
+    0x10FFFF.chr('utf-8').dump.should == '"\u{10FFFF}"'
+  end
+
   it "includes .force_encoding(name) if the encoding isn't ASCII compatible" do
     "\u{876}".encode('utf-16be').dump.should.end_with?(".force_encoding(\"UTF-16BE\")")
     "\u{876}".encode('utf-16le').dump.should.end_with?(".force_encoding(\"UTF-16LE\")")
   end
 
-  it "keeps origin encoding" do
+  it "returns a String in the same encoding as self" do
     "foo".encode("ISO-8859-1").dump.encoding.should == Encoding::ISO_8859_1
     "foo".encode('windows-1251').dump.encoding.should == Encoding::Windows_1251
     1.chr.dump.encoding.should == Encoding::US_ASCII
diff --git a/spec/ruby/core/string/dup_spec.rb b/spec/ruby/core/string/dup_spec.rb
index eec3cf0a70..73f71b8ffc 100644
--- a/spec/ruby/core/string/dup_spec.rb
+++ b/spec/ruby/core/string/dup_spec.rb
@@ -58,4 +58,8 @@ describe "String#dup" do
     orig.should == "c"
     copy.should == "b"
   end
+
+  it "returns a String in the same encoding as self" do
+    "hello".encode("US-ASCII").dup.encoding.should == Encoding::US_ASCII
+  end
 end
diff --git a/spec/ruby/core/string/lines_spec.rb b/spec/ruby/core/string/lines_spec.rb
index ad4b119074..40ab5f71d8 100644
--- a/spec/ruby/core/string/lines_spec.rb
+++ b/spec/ruby/core/string/lines_spec.rb
@@ -1,7 +1,6 @@
 require_relative '../../spec_helper'
 require_relative 'fixtures/classes'
 require_relative 'shared/each_line'
-require_relative 'shared/each_line_without_block'
 
 describe "String#lines" do
   it_behaves_like :string_each_line, :lines
diff --git a/spec/ruby/core/string/reverse_spec.rb b/spec/ruby/core/string/reverse_spec.rb
index 4206b8af90..73526256ef 100644
--- a/spec/ruby/core/string/reverse_spec.rb
+++ b/spec/ruby/core/string/reverse_spec.rb
@@ -37,6 +37,10 @@ describe "String#reverse" do
 
     str.reverse.should == "體黑正\xDE\xDF軟微"
   end
+
+  it "returns a String in the same encoding as self" do
+    "stressed".encode("US-ASCII").reverse.encoding.should == Encoding::US_ASCII
+  end
 end
 
 describe "String#reverse!" do
diff --git a/spec/ruby/core/string/scan_spec.rb b/spec/ruby/core/string/scan_spec.rb
index ab73f5747b..a2d1815132 100644
--- a/spec/ruby/core/string/scan_spec.rb
+++ b/spec/ruby/core/string/scan_spec.rb
@@ -69,6 +69,12 @@ describe "String#scan" do
   it "does not raise any errors when passed a multi-byte string" do
     "あああaaaあああ".scan("あああ").should == ["あああ", "あああ"]
   end
+
+  it "returns Strings in the same encoding as self" do
+    "cruel world".encode("US-ASCII").scan(/\w+/).each do |s|
+      s.encoding.should == Encoding::US_ASCII
+    end
+  end
 end
 
 describe "String#scan with pattern and block" do
diff --git a/spec/ruby/core/string/scrub_spec.rb b/spec/ruby/core/string/scrub_spec.rb
index 66755bcc7b..a51fbd020a 100644
--- a/spec/ruby/core/string/scrub_spec.rb
+++ b/spec/ruby/core/string/scrub_spec.rb
@@ -31,6 +31,11 @@ describe "String#scrub with a default replacement" do
     input.scrub.should == "abc?????"
   end
 
+  it "returns a String in the same encoding as self" do
+    x81 = [0x81].pack('C').force_encoding('utf-8')
+    "abc\u3042#{x81}".scrub.encoding.should == Encoding::UTF_8
+  end
+
   ruby_version_is '3.0' do
     it "returns String instances when called on a subclass" do
       StringSpecs::MyString.new("foo").scrub.should be_an_instance_of(String)
@@ -80,6 +85,11 @@ describe "String#scrub with a custom replacement" do
     block.should raise_error(ArgumentError)
   end
 
+  it "returns a String in the same encoding as self" do
+    x81 = [0x81].pack('C').force_encoding('utf-8')
+    "abc\u3042#{x81}".scrub("*").encoding.should == Encoding::UTF_8
+  end
+
   it "raises TypeError when a non String replacement is given" do
     x81 = [0x81].pack('C').force_encoding('utf-8')
     block = -> { "foo#{x81}".scrub(1) }
diff --git a/spec/ruby/core/string/shared/each_line.rb b/spec/ruby/core/string/shared/each_line.rb
index bfedf8f35a..df78bd2186 100644
--- a/spec/ruby/core/string/shared/each_line.rb
+++ b/spec/ruby/core/string/shared/each_line.rb
@@ -122,6 +122,12 @@ describe :string_each_line, shared: true do
     out.should == ["hello\n", "world."]
   end
 
+  it "returns Strings in the same encoding as self" do
+    "one\ntwo\r\nthree".encode("US-ASCII").send(@method) do |s|
+      s.encoding.should == Encoding::US_ASCII
+    end
+  end
+
   it "raises a TypeError when the separator can't be converted to a string" do
     -> { "hello world".send(@method, false) {}     }.should raise_error(TypeError)
     -> { "hello world".send(@method, mock('x')) {} }.should raise_error(TypeError)
diff --git a/spec/ruby/core/string/shared/partition.rb b/spec/ruby/core/string/shared/partition.rb
index 7dc3d9cc0b..41b3c7e0c9 100644
--- a/spec/ruby/core/string/shared/partition.rb
+++ b/spec/ruby/core/string/shared/partition.rb
@@ -33,4 +33,19 @@ describe :string_partition, shared: true do
       end
     end
   end
+
+  it "returns before- and after- parts in the same encoding as self" do
+    strings = "hello".encode("US-ASCII").send(@method, "ello")
+    strings[0].encoding.should == Encoding::US_ASCII
+    strings[2].encoding.should == Encoding::US_ASCII
+
+    strings = "hello".encode("US-ASCII").send(@method, /ello/)
+    strings[0].encoding.should == Encoding::US_ASCII
+    strings[2].encoding.should == Encoding::US_ASCII
+  end
+
+  it "returns the matching part in the separator's encoding" do
+    strings = "hello".encode("US-ASCII").send(@method, "ello")
+    strings[1].encoding.should == Encoding::UTF_8
+  end
 end
diff --git a/spec/ruby/core/string/shared/slice.rb b/spec/ruby/core/string/shared/slice.rb
index 713234fffd..a7c1d05b56 100644
--- a/spec/ruby/core/string/shared/slice.rb
+++ b/spec/ruby/core/string/shared/slice.rb
@@ -80,7 +80,7 @@ describe :string_slice_index_length, shared: true do
     "hello there".send(@method, -3,2).should == "er"
   end
 
-  it "returns a string with the same encoding" do
+  it "returns a string with the same encoding as self" do
     s = "hello there"
     s.send(@method, 1, 9).encoding.should == s.encoding
 
@@ -206,6 +206,10 @@ describe :string_slice_range, shared: true do
     "x".send(@method, 1..-1).should == ""
   end
 
+  it "returns a String in the same encoding as self" do
+    "hello there".encode("US-ASCII").send(@method, 1..1).encoding.should == Encoding::US_ASCII
+  end
+
   it "returns nil if the beginning of the range falls outside of self" do
     "hello there".send(@method, 12..-1).should == nil
     "hello there".send(@method, 20..25).should == nil
@@ -328,7 +332,8 @@ describe :string_slice_regexp, shared: true do
     "hello there".send(@method, /xyz/).should == nil
   end
 
-  not_supported_on :opal do
+  it "returns a String in the same encoding as self" do
+    "hello there".encode("US-ASCII").send(@method, /[aeiou](.)\1/).encoding.should == Encoding::US_ASCII
   end
 
   ruby_version_is ''...'3.0' do
@@ -391,6 +396,10 @@ describe :string_slice_regexp_index, shared: true do
     $~[1].should == nil
   end
 
+  it "returns a String in the same encoding as self" do
+    "hello there".encode("US-ASCII").send(@method, /[aeiou](.)\1/, 0).encoding.should == Encoding::US_ASCII
+  end
+
   it "calls to_int on the given index" do
     obj = mock('2')
     obj.should_receive(:to_int).and_return(2)
diff --git a/spec/ruby/core/string/shared/strip.rb b/spec/ruby/core/string/shared/strip.rb
index 9c232b4694..0c0aae20f3 100644
--- a/spec/ruby/core/string/shared/strip.rb
+++ b/spec/ruby/core/string/shared/strip.rb
@@ -2,6 +2,10 @@ require_relative '../../../spec_helper'
 require_relative '../fixtures/classes'
 
 describe :string_strip, shared: true do
+  it "returns a String in the same encoding as self" do
+    " hello ".encode("US-ASCII").send(@method).encoding.should == Encoding::US_ASCII
+  end
+
   ruby_version_is '3.0' do
     it "returns String instances when called on a subclass" do
       StringSpecs::MyString.new(" hello ").send(@method).should be_an_instance_of(String)
diff --git a/spec/ruby/core/string/shared/succ.rb b/spec/ruby/core/string/shared/succ.rb
index 66edf6dc82..3605fa99a2 100644
--- a/spec/ruby/core/string/shared/succ.rb
+++ b/spec/ruby/core/string/shared/succ.rb
@@ -74,6 +74,10 @@ describe :string_succ, shared: true do
       StringSpecs::MyString.new("z").send(@method).should be_an_instance_of(String)
     end
   end
+
+  it "returns a String in the same encoding as self" do
+    "z".encode("US-ASCII").send(@method).encoding.should == Encoding::US_ASCII
+  end
 end
 
 describe :string_succ_bang, shared: true do
diff --git a/spec/ruby/core/string/split_spec.rb b/spec/ruby/core/string/split_spec.rb
index 0417486692..519c5d845d 100644
--- a/spec/ruby/core/string/split_spec.rb
+++ b/spec/ruby/core/string/split_spec.rb
@@ -246,6 +246,13 @@ describe "String#split with String" do
   it "doesn't split on non-ascii whitespace" do
     "a\u{2008}b".split(" ").should == ["a\u{2008}b"]
   end
+
+  it "returns Strings in the same encoding as self" do
+    strings = "hello world".encode("US-ASCII").split(" ")
+
+    strings[0].encoding.should == Encoding::US_ASCII
+    strings[1].encoding.should == Encoding::US_ASCII
+  end
 end
 
 describe "String#split with Regexp" do
@@ -443,13 +450,12 @@ describe "String#split with Regexp" do
     end
   end
 
-  it "retains the encoding of the source string" do
+  it "returns Strings in the same encoding as self" do
     ary = "а б в".split
     encodings = ary.map { |s| s.encoding }
     encodings.should == [Encoding::UTF_8, Encoding::UTF_8, Encoding::UTF_8]
   end
 
-
   it "splits a string on each character for a multibyte encoding and empty split" do
     "That's why eﬃciency could not be helped".split("").size.should == 39
   end
@@ -598,4 +604,11 @@ describe "String#split with Regexp" do
     -> { "hello".split(false) }.should raise_error(TypeError)
     -> { "hello".split(Object.new) }.should raise_error(TypeError)
   end
+
+  it "returns Strings in the same encoding as self" do
+    strings = "hello world".encode("US-ASCII").split(/ /)
+
+    strings[0].encoding.should == Encoding::US_ASCII
+    strings[1].encoding.should == Encoding::US_ASCII
+  end
 end
diff --git a/spec/ruby/core/string/squeeze_spec.rb b/spec/ruby/core/string/squeeze_spec.rb
index 5dc12a4247..2f3fa65745 100644
--- a/spec/ruby/core/string/squeeze_spec.rb
+++ b/spec/ruby/core/string/squeeze_spec.rb
@@ -64,6 +64,11 @@ describe "String#squeeze" do
     "hello room".squeeze(other_string, other_string2).should == "hello rom"
   end
 
+  it "returns a String in the same encoding as self" do
+    "yellow moon".encode("US-ASCII").squeeze.encoding.should == Encoding::US_ASCII
+    "yellow moon".encode("US-ASCII").squeeze("a").encoding.should == Encoding::US_ASCII
+  end
+
   it "raises a TypeError when one set arg can't be converted to a string" do
     -> { "hello world".squeeze([])        }.should raise_error(TypeError)
     -> { "hello world".squeeze(Object.new)}.should raise_error(TypeError)
diff --git a/spec/ruby/core/string/swapcase_spec.rb b/spec/ruby/core/string/swapcase_spec.rb
index 6307a1eaaf..d369ab3e4e 100644
--- a/spec/ruby/core/string/swapcase_spec.rb
+++ b/spec/ruby/core/string/swapcase_spec.rb
@@ -9,6 +9,10 @@ describe "String#swapcase" do
    "+++---111222???".swapcase.should == "+++---111222???"
   end
 
+  it "returns a String in the same encoding as self" do
+    "Hello".encode("US-ASCII").swapcase.encoding.should == Encoding::US_ASCII
+  end
+
   describe "full Unicode case mapping" do
     it "works for all of Unicode with no option" do
       "äÖü".swapcase.should == "ÄöÜ"
diff --git a/spec/ruby/core/string/undump_spec.rb b/spec/ruby/core/string/undump_spec.rb
index 08058d9bd1..6ff220161c 100644
--- a/spec/ruby/core/string/undump_spec.rb
+++ b/spec/ruby/core/string/undump_spec.rb
@@ -389,7 +389,7 @@ describe "String#undump" do
     '"\\bv".force_encoding("UTF-16BE")'.undump.should == "\u0876".encode('utf-16be')
   end
 
-  it "keeps origin encoding" do
+  it "returns a String in the same encoding as self" do
     '"foo"'.encode("ISO-8859-1").undump.encoding.should == Encoding::ISO_8859_1
     '"foo"'.encode('windows-1251').undump.encoding.should == Encoding::Windows_1251
   end
diff --git a/spec/ruby/core/string/unpack/b_spec.rb b/spec/ruby/core/string/unpack/b_spec.rb
index fcabc99731..2cf5ebad34 100644
--- a/spec/ruby/core/string/unpack/b_spec.rb
+++ b/spec/ruby/core/string/unpack/b_spec.rb
@@ -86,8 +86,18 @@ describe "String#unpack with format 'B'" do
     ].should be_computed_by(:unpack, "BBB")
   end
 
-  it "ignores NULL bytes between directives" do
-    "\x80\x00".unpack("B\x00B").should == ["1", "0"]
+  ruby_version_is ""..."3.3" do
+    it "ignores NULL bytes between directives" do
+      "\x80\x00".unpack("B\x00B").should == ["1", "0"]
+    end
+  end
+
+  ruby_version_is "3.3" do
+    it "raise ArgumentError for NULL bytes between directives" do
+      -> {
+        "\x80\x00".unpack("B\x00B")
+      }.should raise_error(ArgumentError, /unknown unpack directive/)
+    end
   end
 
   it "ignores spaces between directives" do
@@ -182,8 +192,18 @@ describe "String#unpack with format 'b'" do
     ].should be_computed_by(:unpack, "bbb")
   end
 
-  it "ignores NULL bytes between directives" do
-    "\x01\x00".unpack("b\x00b").should == ["1", "0"]
+  ruby_version_is ""..."3.3" do
+    it "ignores NULL bytes between directives" do
+      "\x01\x00".unpack("b\x00b").should == ["1", "0"]
+    end
+  end
+
+  ruby_version_is "3.3" do
+    it "raise ArgumentError for NULL bytes between directives" do
+      -> {
+        "\x01\x00".unpack("b\x00b")
+      }.should raise_error(ArgumentError, /unknown unpack directive/)
+    end
   end
 
   it "ignores spaces between directives" do
diff --git a/spec/ruby/core/string/unpack/c_spec.rb b/spec/ruby/core/string/unpack/c_spec.rb
index ed8caa4895..dbcbacc74d 100644
--- a/spec/ruby/core/string/unpack/c_spec.rb
+++ b/spec/ruby/core/string/unpack/c_spec.rb
@@ -35,8 +35,18 @@ describe :string_unpack_8bit, shared: true do
     ].should be_computed_by(:unpack, unpack_format(3))
   end
 
-  it "ignores NULL bytes between directives" do
-    "abc".unpack(unpack_format("\000", 2)).should == [97, 98]
+  ruby_version_is ""..."3.3" do
+    it "ignores NULL bytes between directives" do
+      "abc".unpack(unpack_format("\000", 2)).should == [97, 98]
+    end
+  end
+
+  ruby_version_is "3.3" do
+    it "raise ArgumentError for NULL bytes between directives" do
+      -> {
+        "abc".unpack(unpack_format("\000", 2))
+      }.should raise_error(ArgumentError, /unknown unpack directive/)
+    end
   end
 
   it "ignores spaces between directives" do
diff --git a/spec/ruby/core/string/unpack/h_spec.rb b/spec/ruby/core/string/unpack/h_spec.rb
index f2f5dcf396..ee08d20926 100644
--- a/spec/ruby/core/string/unpack/h_spec.rb
+++ b/spec/ruby/core/string/unpack/h_spec.rb
@@ -56,8 +56,18 @@ describe "String#unpack with format 'H'" do
     ].should be_computed_by(:unpack, "HHH")
   end
 
-  it "ignores NULL bytes between directives" do
-    "\x01\x10".unpack("H\x00H").should == ["0", "1"]
+  ruby_version_is ""..."3.3" do
+    it "ignores NULL bytes between directives" do
+      "\x01\x10".unpack("H\x00H").should == ["0", "1"]
+    end
+  end
+
+  ruby_version_is "3.3" do
+    it "raise ArgumentError for NULL bytes between directives" do
+      -> {
+        "\x01\x10".unpack("H\x00H")
+      }.should raise_error(ArgumentError, /unknown unpack directive/)
+    end
   end
 
   it "ignores spaces between directives" do
@@ -121,8 +131,18 @@ describe "String#unpack with format 'h'" do
     ].should be_computed_by(:unpack, "hhh")
   end
 
-  it "ignores NULL bytes between directives" do
-    "\x01\x10".unpack("h\x00h").should == ["1", "0"]
+  ruby_version_is ""..."3.3" do
+    it "ignores NULL bytes between directives" do
+      "\x01\x10".unpack("h\x00h").should == ["1", "0"]
+    end
+  end
+
+  ruby_version_is "3.3" do
+    it "raise ArgumentError for NULL bytes between directives" do
+      -> {
+        "\x01\x10".unpack("h\x00h")
+      }.should raise_error(ArgumentError, /unknown unpack directive/)
+    end
   end
 
   it "ignores spaces between directives" do
diff --git a/spec/ruby/core/string/unpack/shared/basic.rb b/spec/ruby/core/string/unpack/shared/basic.rb
index f636f4689f..bb5302edc5 100644
--- a/spec/ruby/core/string/unpack/shared/basic.rb
+++ b/spec/ruby/core/string/unpack/shared/basic.rb
@@ -8,20 +8,6 @@ describe :string_unpack_basic, shared: true do
     d.should_receive(:to_str).and_return("a"+unpack_format)
     "abc".unpack(d).should be_an_instance_of(Array)
   end
-
-  it "raises a TypeError when passed nil" do
-    -> { "abc".unpack(nil) }.should raise_error(TypeError)
-  end
-
-  it "raises a TypeError when passed an Integer" do
-    -> { "abc".unpack(1) }.should raise_error(TypeError)
-  end
-
-  ruby_version_is "3.1" do
-    it "starts unpacking from the given offset" do
-      "abc".unpack("CC", offset: 1).should == [98, 99]
-    end
-  end
 end
 
 describe :string_unpack_no_platform, shared: true do
@@ -32,18 +18,4 @@ describe :string_unpack_no_platform, shared: true do
   it "raises an ArgumentError when the format modifier is '!'" do
     -> { "abcdefgh".unpack(unpack_format("!")) }.should raise_error(ArgumentError)
   end
-
-  ruby_version_is "3.1" do
-    it "raises an ArgumentError when the offset is negative" do
-      -> { "a".unpack("C", offset: -1) }.should raise_error(ArgumentError)
-    end
-
-    it "returns nil if the offset is at the end of the string" do
-      "a".unpack("C", offset: 1).should == [nil]
-    end
-
-    it "raises an ArgumentError when the offset is larget than the string" do
-      -> { "a".unpack("C", offset: 2) }.should raise_error(ArgumentError)
-    end
-  end
 end
diff --git a/spec/ruby/core/string/unpack/shared/float.rb b/spec/ruby/core/string/unpack/shared/float.rb
index 99bd8a3401..ccddf94f99 100644
--- a/spec/ruby/core/string/unpack/shared/float.rb
+++ b/spec/ruby/core/string/unpack/shared/float.rb
@@ -56,9 +56,19 @@ describe :string_unpack_float_le, shared: true do
     [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true
   end
 
-  it "ignores NULL bytes between directives" do
-    array = "\x9a\x999@33\xb3?".unpack(unpack_format("\000", 2))
-    array.should == [2.9000000953674316, 1.399999976158142]
+  ruby_version_is ""..."3.3" do
+    it "ignores NULL bytes between directives" do
+      array = "\x9a\x999@33\xb3?".unpack(unpack_format("\000", 2))
+      array.should == [2.9000000953674316, 1.399999976158142]
+    end
+  end
+
+  ruby_version_is "3.3" do
+    it "raise ArgumentError for NULL bytes between directives" do
+      -> {
+        "\x9a\x999@33\xb3?".unpack(unpack_format("\000", 2))
+      }.should raise_error(ArgumentError, /unknown unpack directive/)
+    end
   end
 
   it "ignores spaces between directives" do
@@ -123,9 +133,19 @@ describe :string_unpack_float_be, shared: true do
     [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true
   end
 
-  it "ignores NULL bytes between directives" do
-    array = "@9\x99\x9a?\xb333".unpack(unpack_format("\000", 2))
-    array.should == [2.9000000953674316, 1.399999976158142]
+  ruby_version_is ""..."3.3" do
+    it "ignores NULL bytes between directives" do
+      array = "@9\x99\x9a?\xb333".unpack(unpack_format("\000", 2))
+      array.should == [2.9000000953674316, 1.399999976158142]
+    end
+  end
+
+  ruby_version_is "3.3" do
+    it "raise ArgumentError for NULL bytes between directives" do
+      -> {
+        "@9\x99\x9a?\xb333".unpack(unpack_format("\000", 2))
+      }.should raise_error(ArgumentError, /unknown unpack directive/)
+    end
   end
 
   it "ignores spaces between directives" do
@@ -193,8 +213,18 @@ describe :string_unpack_double_le, shared: true do
     [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true
   end
 
-  it "ignores NULL bytes between directives" do
-    "333333\x07@ffffff\xf6?".unpack(unpack_format("\000", 2)).should == [2.9, 1.4]
+  ruby_version_is ""..."3.3" do
+    it "ignores NULL bytes between directives" do
+      "333333\x07@ffffff\xf6?".unpack(unpack_format("\000", 2)).should == [2.9, 1.4]
+    end
+  end
+
+  ruby_version_is "3.3" do
+    it "raise ArgumentError for NULL bytes between directives" do
+      -> {
+        "333333\x07@ffffff\xf6?".unpack(unpack_format("\000", 2))
+      }.should raise_error(ArgumentError, /unknown unpack directive/)
+    end
   end
 
   it "ignores spaces between directives" do
@@ -261,8 +291,18 @@ describe :string_unpack_double_be, shared: true do
     [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true
   end
 
-  it "ignores NULL bytes between directives" do
-    "@\x07333333?\xf6ffffff".unpack(unpack_format("\000", 2)).should == [2.9, 1.4]
+  ruby_version_is ""..."3.3" do
+    it "ignores NULL bytes between directives" do
+      "@\x07333333?\xf6ffffff".unpack(unpack_format("\000", 2)).should == [2.9, 1.4]
+    end
+  end
+
+  ruby_version_is "3.3" do
+    it "raise ArgumentError for NULL bytes between directives" do
+      -> {
+        "@\x07333333?\xf6ffffff".unpack(unpack_format("\000", 2))
+      }.should raise_error(ArgumentError, /unknown unpack directive/)
+    end
   end
 
   it "ignores spaces between directives" do
diff --git a/spec/ruby/core/string/unpack/shared/integer.rb b/spec/ruby/core/string/unpack/shared/integer.rb
index cbaa743683..ba4f149dad 100644
--- a/spec/ruby/core/string/unpack/shared/integer.rb
+++ b/spec/ruby/core/string/unpack/shared/integer.rb
@@ -32,8 +32,18 @@ describe :string_unpack_16bit_le, shared: true do
     ].should be_computed_by(:unpack, unpack_format(3))
   end
 
-  it "ignores NULL bytes between directives" do
-    "abcd".unpack(unpack_format("\000", 2)).should == [25185, 25699]
+  ruby_version_is ""..."3.3" do
+    it "ignores NULL bytes between directives" do
+      "abcd".unpack(unpack_format("\000", 2)).should == [25185, 25699]
+    end
+  end
+
+  ruby_version_is "3.3" do
+    it "raise ArgumentError for NULL bytes between directives" do
+      -> {
+        "abcd".unpack(unpack_format("\000", 2))
+      }.should raise_error(ArgumentError, /unknown unpack directive/)
+    end
   end
 
   it "ignores spaces between directives" do
@@ -85,8 +95,18 @@ describe :string_unpack_16bit_be, shared: true do
     ].should be_computed_by(:unpack, unpack_format(3))
   end
 
-  it "ignores NULL bytes between directives" do
-    "badc".unpack(unpack_format("\000", 2)).should == [25185, 25699]
+  ruby_version_is ""..."3.3" do
+    it "ignores NULL bytes between directives" do
+      "badc".unpack(unpack_format("\000", 2)).should == [25185, 25699]
+    end
+  end
+
+  ruby_version_is "3.3" do
+    it "raise ArgumentError for NULL bytes between directives" do
+      -> {
+        "badc".unpack(unpack_format("\000", 2))
+      }.should raise_error(ArgumentError, /unknown unpack directive/)
+    end
   end
 
   it "ignores spaces between directives" do
@@ -139,8 +159,18 @@ describe :string_unpack_32bit_le, shared: true do
     ].should be_computed_by(:unpack, unpack_format(3))
   end
 
-  it "ignores NULL bytes between directives" do
-    "abcdefgh".unpack(unpack_format("\000", 2)).should == [1684234849, 1751606885]
+  ruby_version_is ""..."3.3" do
+    it "ignores NULL bytes between directives" do
+      "abcdefgh".unpack(unpack_format("\000", 2)).should == [1684234849, 1751606885]
+    end
+  end
+
+  ruby_version_is "3.3" do
+    it "raise ArgumentError for NULL bytes between directives" do
+      -> {
+        "abcdefgh".unpack(unpack_format("\000", 2))
+      }.should raise_error(ArgumentError, /unknown unpack directive/)
+    end
   end
 
   it "ignores spaces between directives" do
@@ -193,8 +223,18 @@ describe :string_unpack_32bit_be, shared: true do
     ].should be_computed_by(:unpack, unpack_format(3))
   end
 
-  it "ignores NULL bytes between directives" do
-    "dcbahgfe".unpack(unpack_format("\000", 2)).should == [1684234849, 1751606885]
+  ruby_version_is ""..."3.3" do
+    it "ignores NULL bytes between directives" do
+      "dcbahgfe".unpack(unpack_format("\000", 2)).should == [1684234849, 1751606885]
+    end
+  end
+
+  ruby_version_is "3.3" do
+    it "raise ArgumentError for NULL bytes between directives" do
+      -> {
+        "dcbahgfe".unpack(unpack_format("\000", 2))
+      }.should raise_error(ArgumentError, /unknown unpack directive/)
+    end
   end
 
   it "ignores spaces between directives" do
@@ -243,9 +283,19 @@ describe :string_unpack_64bit_le, shared: true do
     "abc".unpack(unpack_format('*')).should == []
   end
 
-  it "ignores NULL bytes between directives" do
-    array = "abcdefghabghefcd".unpack(unpack_format("\000", 2))
-    array.should == [7523094288207667809, 7233738012216484449]
+  ruby_version_is ""..."3.3" do
+    it "ignores NULL bytes between directives" do
+      array = "abcdefghabghefcd".unpack(unpack_format("\000", 2))
+      array.should == [7523094288207667809, 7233738012216484449]
+    end
+  end
+
+  ruby_version_is "3.3" do
+    it "raise ArgumentError for NULL bytes between directives" do
+      -> {
+        "badc".unpack(unpack_format("\000", 2))
+      }.should raise_error(ArgumentError, /unknown unpack directive/)
+    end
   end
 
   it "ignores spaces between directives" do
@@ -305,9 +355,19 @@ describe :string_unpack_64bit_be, shared: true do
     "abc".unpack(unpack_format('*')).should == []
   end
 
-  it "ignores NULL bytes between directives" do
-    array = "hgfedcbadcfehgba".unpack(unpack_format("\000", 2))
-    array.should == [7523094288207667809, 7233738012216484449]
+  ruby_version_is ""..."3.3" do
+    it "ignores NULL bytes between directives" do
+      array = "hgfedcbadcfehgba".unpack(unpack_format("\000", 2))
+      array.should == [7523094288207667809, 7233738012216484449]
+    end
+  end
+
+  ruby_version_is "3.3" do
+    it "raise ArgumentError for NULL bytes between directives" do
+      -> {
+        "hgfedcbadcfehgba".unpack(unpack_format("\000", 2))
+      }.should raise_error(ArgumentError, /unknown unpack directive/)
+    end
   end
 
   it "ignores spaces between directives" do
diff --git a/spec/ruby/core/string/unpack/shared/unicode.rb b/spec/ruby/core/string/unpack/shared/unicode.rb
index a2b4e142b2..ce1f29fe87 100644
--- a/spec/ruby/core/string/unpack/shared/unicode.rb
+++ b/spec/ruby/core/string/unpack/shared/unicode.rb
@@ -50,8 +50,18 @@ describe :string_unpack_unicode, shared: true do
     "\xc2\x80".unpack("UUUU").should == [0x80]
   end
 
-  it "ignores NULL bytes between directives" do
-    "\x01\x02".unpack("U\x00U").should == [1, 2]
+  ruby_version_is ""..."3.3" do
+    it "ignores NULL bytes between directives" do
+      "\x01\x02".unpack("U\x00U").should == [1, 2]
+    end
+  end
+
+  ruby_version_is "3.3" do
+    it "raise ArgumentError for NULL bytes between directives" do
+      -> {
+        "\x01\x02".unpack("U\x00U")
+      }.should raise_error(ArgumentError, /unknown unpack directive/)
+    end
   end
 
   it "ignores spaces between directives" do
diff --git a/spec/ruby/core/string/unpack/w_spec.rb b/spec/ruby/core/string/unpack/w_spec.rb
index 011c75f5c4..b213b32921 100644
--- a/spec/ruby/core/string/unpack/w_spec.rb
+++ b/spec/ruby/core/string/unpack/w_spec.rb
@@ -15,8 +15,18 @@ describe "String#unpack with directive 'w'" do
     ].should be_computed_by(:unpack, "w")
   end
 
-  it "ignores NULL bytes between directives" do
-    "\x01\x02\x03".unpack("w\x00w").should == [1, 2]
+  ruby_version_is ""..."3.3" do
+    it "ignores NULL bytes between directives" do
+      "\x01\x02\x03".unpack("w\x00w").should == [1, 2]
+    end
+  end
+
+  ruby_version_is "3.3" do
+    it "raise ArgumentError for NULL bytes between directives" do
+      -> {
+        "\x01\x02\x03".unpack("w\x00w")
+      }.should raise_error(ArgumentError, /unknown unpack directive/)
+    end
   end
 
   it "ignores spaces between directives" do
diff --git a/spec/ruby/core/string/unpack1_spec.rb b/spec/ruby/core/string/unpack1_spec.rb
index f59bd92d6a..df830916a3 100644
--- a/spec/ruby/core/string/unpack1_spec.rb
+++ b/spec/ruby/core/string/unpack1_spec.rb
@@ -15,16 +15,22 @@ describe "String#unpack1" do
       "ZA".unpack1("B*", offset: 1).should == "01000001"
     end
 
+    it "traits offset as a bytes offset" do
+      "؈".unpack("CC").should == [216, 136]
+      "؈".unpack1("C").should == 216
+      "؈".unpack1("C", offset: 1).should == 136
+    end
+
     it "raises an ArgumentError when the offset is negative" do
-      -> { "a".unpack1("C", offset: -1) }.should raise_error(ArgumentError)
+      -> { "a".unpack1("C", offset: -1) }.should raise_error(ArgumentError, "offset can't be negative")
     end
 
     it "returns nil if the offset is at the end of the string" do
       "a".unpack1("C", offset: 1).should == nil
     end
 
-    it "raises an ArgumentError when the offset is larget than the string" do
-      -> { "a".unpack1("C", offset: 2) }.should raise_error(ArgumentError)
+    it "raises an ArgumentError when the offset is larger than the string bytesize" do
+      -> { "a".unpack1("C", offset: 2) }.should raise_error(ArgumentError, "offset outside of string")
     end
   end
 end
diff --git a/spec/ruby/core/string/unpack_spec.rb b/spec/ruby/core/string/unpack_spec.rb
new file mode 100644
index 0000000000..4ff7d07460
--- /dev/null
+++ b/spec/ruby/core/string/unpack_spec.rb
@@ -0,0 +1,34 @@
+require_relative '../../spec_helper'
+
+describe "String#unpack" do
+  it "raises a TypeError when passed nil" do
+    -> { "abc".unpack(nil) }.should raise_error(TypeError)
+  end
+
+  it "raises a TypeError when passed an Integer" do
+    -> { "abc".unpack(1) }.should raise_error(TypeError)
+  end
+
+  ruby_version_is "3.1" do
+    it "starts unpacking from the given offset" do
+      "abc".unpack("CC", offset: 1).should == [98, 99]
+    end
+
+    it "traits offset as a bytes offset" do
+      "؈".unpack("CC").should == [216, 136]
+      "؈".unpack("CC", offset: 1).should == [136, nil]
+    end
+
+    it "raises an ArgumentError when the offset is negative" do
+      -> { "a".unpack("C", offset: -1) }.should raise_error(ArgumentError, "offset can't be negative")
+    end
+
+    it "returns nil if the offset is at the end of the string" do
+      "a".unpack("C", offset: 1).should == [nil]
+    end
+
+    it "raises an ArgumentError when the offset is larget than the string" do
+      -> { "a".unpack("C", offset: 2) }.should raise_error(ArgumentError, "offset outside of string")
+    end
+  end
+end
+\ No newline at end of file
diff --git a/spec/ruby/core/string/upcase_spec.rb b/spec/ruby/core/string/upcase_spec.rb
index 209fe73b6e..5ce7b0b95f 100644
--- a/spec/ruby/core/string/upcase_spec.rb
+++ b/spec/ruby/core/string/upcase_spec.rb
@@ -8,6 +8,10 @@ describe "String#upcase" do
     "hello".upcase.should == "HELLO"
   end
 
+  it "returns a String in the same encoding as self" do
+    "hello".encode("US-ASCII").upcase.encoding.should == Encoding::US_ASCII
+  end
+
   describe "full Unicode case mapping" do
     it "works for all of Unicode with no option" do
       "äöü".upcase.should == "ÄÖÜ"
diff --git a/spec/ruby/core/string/valid_encoding/utf_8_spec.rb b/spec/ruby/core/string/valid_encoding/utf_8_spec.rb
new file mode 100644
index 0000000000..a14c3af830
--- /dev/null
+++ b/spec/ruby/core/string/valid_encoding/utf_8_spec.rb
@@ -0,0 +1,214 @@
+# -*- encoding: utf-8 -*-
+require_relative '../../../spec_helper'
+
+describe "String#valid_encoding? and UTF-8" do
+  def utf8(bytes)
+    bytes.pack("C*").force_encoding("UTF-8")
+  end
+
+  describe "1-byte character" do
+    it "is valid if is in format 0xxxxxxx" do
+      utf8([0b00000000]).valid_encoding?.should == true
+      utf8([0b01111111]).valid_encoding?.should == true
+    end
+
+    it "is not valid if is not in format 0xxxxxxx" do
+      utf8([0b10000000]).valid_encoding?.should == false
+      utf8([0b11111111]).valid_encoding?.should == false
+    end
+  end
+
+  describe "2-bytes character" do
+    it "is valid if in format [110xxxxx 10xxxxx]" do
+      utf8([0b11000010, 0b10000000]).valid_encoding?.should == true
+      utf8([0b11000010, 0b10111111]).valid_encoding?.should == true
+
+      utf8([0b11011111, 0b10000000]).valid_encoding?.should == true
+      utf8([0b11011111, 0b10111111]).valid_encoding?.should == true
+    end
+
+    it "is not valid if the first byte is not in format 110xxxxx" do
+      utf8([0b00000010, 0b10000000]).valid_encoding?.should == false
+      utf8([0b00100010, 0b10000000]).valid_encoding?.should == false
+      utf8([0b01000010, 0b10000000]).valid_encoding?.should == false
+      utf8([0b01100010, 0b10000000]).valid_encoding?.should == false
+      utf8([0b10000010, 0b10000000]).valid_encoding?.should == false
+      utf8([0b10100010, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11000010, 0b10000000]).valid_encoding?.should == true # correct bytes
+      utf8([0b11100010, 0b10000000]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the second byte is not in format 10xxxxxx" do
+      utf8([0b11000010, 0b00000000]).valid_encoding?.should == false
+      utf8([0b11000010, 0b01000000]).valid_encoding?.should == false
+      utf8([0b11000010, 0b11000000]).valid_encoding?.should == false
+    end
+
+    it "is not valid if is smaller than [xxxxxx10 xx000000] (codepoints < U+007F, that are encoded with the 1-byte format)" do
+      utf8([0b11000000, 0b10111111]).valid_encoding?.should == false
+      utf8([0b11000001, 0b10111111]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the first byte is missing" do
+      bytes = [0b11000010, 0b10000000]
+      utf8(bytes[1..1]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the second byte is missing" do
+      bytes = [0b11000010, 0b10000000]
+      utf8(bytes[0..0]).valid_encoding?.should == false
+    end
+  end
+
+  describe "3-bytes character" do
+    it "is valid if in format [1110xxxx 10xxxxxx 10xxxxxx]" do
+      utf8([0b11100000, 0b10100000, 0b10000000]).valid_encoding?.should == true
+      utf8([0b11100000, 0b10100000, 0b10111111]).valid_encoding?.should == true
+      utf8([0b11100000, 0b10111111, 0b10111111]).valid_encoding?.should == true
+      utf8([0b11101111, 0b10111111, 0b10111111]).valid_encoding?.should == true
+    end
+
+    it "is not valid if the first byte is not in format 1110xxxx" do
+      utf8([0b00000000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b00010000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b00100000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b00110000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b01000000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b01010000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b01100000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b01110000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b10000000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b10010000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b10100000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b10110000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11000000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11010000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11100000, 0b10100000, 0b10000000]).valid_encoding?.should == true # correct bytes
+      utf8([0b11110000, 0b10100000, 0b10000000]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the second byte is not in format 10xxxxxx" do
+      utf8([0b11100000, 0b00100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11100000, 0b01100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11100000, 0b11100000, 0b10000000]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the third byte is not in format 10xxxxxx" do
+      utf8([0b11100000, 0b10100000, 0b00000000]).valid_encoding?.should == false
+      utf8([0b11100000, 0b10100000, 0b01000000]).valid_encoding?.should == false
+      utf8([0b11100000, 0b10100000, 0b01000000]).valid_encoding?.should == false
+    end
+
+    it "is not valid if is smaller than [xxxx0000 xx100000 xx000000] (codepoints < U+07FF that are encoded with the 2-byte format)" do
+      utf8([0b11100000, 0b10010000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11100000, 0b10001000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11100000, 0b10000100, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11100000, 0b10000010, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11100000, 0b10000001, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11100000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+    end
+
+    it "is not valid if in range [xxxx1101 xx100000 xx000000] - [xxxx1101 xx111111 xx111111] (codepoints U+D800 - U+DFFF)" do
+      utf8([0b11101101, 0b10100000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11101101, 0b10100000, 0b10000001]).valid_encoding?.should == false
+      utf8([0b11101101, 0b10111111, 0b10111111]).valid_encoding?.should == false
+
+      utf8([0b11101101, 0b10011111, 0b10111111]).valid_encoding?.should == true # lower boundary - 1
+      utf8([0b11101110, 0b10000000, 0b10000000]).valid_encoding?.should == true # upper boundary + 1
+    end
+
+    it "is not valid if the first byte is missing" do
+      bytes = [0b11100000, 0b10100000, 0b10000000]
+      utf8(bytes[2..3]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the second byte is missing" do
+      bytes = [0b11100000, 0b10100000, 0b10000000]
+      utf8([bytes[0], bytes[2]]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the second and the third bytes are missing" do
+      bytes = [0b11100000, 0b10100000, 0b10000000]
+      utf8(bytes[0..0]).valid_encoding?.should == false
+    end
+  end
+
+  describe "4-bytes character" do
+    it "is valid if in format [11110xxx 10xxxxxx 10xxxxxx 10xxxxxx]" do
+      utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true
+      utf8([0b11110000, 0b10010000, 0b10000000, 0b10111111]).valid_encoding?.should == true
+      utf8([0b11110000, 0b10010000, 0b10111111, 0b10111111]).valid_encoding?.should == true
+      utf8([0b11110000, 0b10111111, 0b10111111, 0b10111111]).valid_encoding?.should == true
+      utf8([0b11110100, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == true
+    end
+
+    it "is not valid if the first byte is not in format 11110xxx" do
+      utf8([0b11100000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11010000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b10110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b01110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the second byte is not in format 10xxxxxx" do
+      utf8([0b11110000, 0b00010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b01010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true # correct bytes
+      utf8([0b11110000, 0b11010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the third byte is not in format 10xxxxxx" do
+      utf8([0b11110000, 0b10010000, 0b00000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10010000, 0b01000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true # correct bytes
+      utf8([0b11110000, 0b10010000, 0b11000000, 0b10000000]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the forth byte is not in format 10xxxxxx" do
+      utf8([0b11110000, 0b10010000, 0b10000000, 0b00000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10010000, 0b10000000, 0b01000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true # correct bytes
+      utf8([0b11110000, 0b10010000, 0b10000000, 0b11000000]).valid_encoding?.should == false
+    end
+
+    it "is not valid if is smaller than [xxxxx000 xx001000 xx000000 xx000000] (codepoint < U+10000)" do
+      utf8([0b11110000, 0b10000111, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10000110, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10000101, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10000100, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10000011, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10000010, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10000001, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110000, 0b10000000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+    end
+
+    it "is not valid if is greater than [xxxxx100 xx001111 xx111111 xx111111] (codepoint > U+10FFFF)" do
+      utf8([0b11110100, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110100, 0b10100000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+      utf8([0b11110100, 0b10110000, 0b10000000, 0b10000000]).valid_encoding?.should == false
+
+      utf8([0b11110101, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == false
+      utf8([0b11110110, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == false
+      utf8([0b11110111, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the first byte is missing" do
+      bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000]
+      utf8(bytes[1..3]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the second byte is missing" do
+      bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000]
+      utf8([bytes[0], bytes[2], bytes[3]]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the second and the third bytes are missing" do
+      bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000]
+      utf8([bytes[0], bytes[3]]).valid_encoding?.should == false
+    end
+
+    it "is not valid if the second, the third and the fourth bytes are missing" do
+      bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000]
+      utf8(bytes[0..0]).valid_encoding?.should == false
+    end
+  end
+end