summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/cgi.rb1
-rw-r--r--lib/cgi/core.rb8
-rw-r--r--lib/cgi/escape.rb224
-rw-r--r--lib/cgi/util.rb216
4 files changed, 230 insertions, 219 deletions
diff --git a/lib/cgi.rb b/lib/cgi.rb
index 69c3c4fd24..d6f3ed5b49 100644
--- a/lib/cgi.rb
+++ b/lib/cgi.rb
@@ -294,4 +294,5 @@ end
require 'cgi/core'
require 'cgi/cookie'
require 'cgi/util'
+require 'cgi/escape'
CGI.autoload(:HtmlExtension, 'cgi/html')
diff --git a/lib/cgi/core.rb b/lib/cgi/core.rb
index 62e606837a..e6c19bbaa5 100644
--- a/lib/cgi/core.rb
+++ b/lib/cgi/core.rb
@@ -4,12 +4,12 @@
# generating HTTP responses.
#++
class CGI
- unless const_defined?(:Util)
- module Util
+ unless const_defined?(:Escape)
+ module Escape
@@accept_charset = "UTF-8" # :nodoc:
end
- include Util
- extend Util
+ include Escape
+ extend Escape
end
$CGI_ENV = ENV # for FCGI support
diff --git a/lib/cgi/escape.rb b/lib/cgi/escape.rb
new file mode 100644
index 0000000000..59a310b32d
--- /dev/null
+++ b/lib/cgi/escape.rb
@@ -0,0 +1,224 @@
+# frozen_string_literal: true
+
+class CGI
+ module Escape; end
+ include Escape
+ extend Escape
+end
+
+module CGI::Escape
+ @@accept_charset = Encoding::UTF_8 unless defined?(@@accept_charset)
+
+ # URL-encode a string into application/x-www-form-urlencoded.
+ # Space characters (+" "+) are encoded with plus signs (+"+"+)
+ # url_encoded_string = CGI.escape("'Stop!' said Fred")
+ # # => "%27Stop%21%27+said+Fred"
+ def escape(string)
+ encoding = string.encoding
+ buffer = string.b
+ buffer.gsub!(/([^ a-zA-Z0-9_.\-~]+)/) do |m|
+ '%' + m.unpack('H2' * m.bytesize).join('%').upcase
+ end
+ buffer.tr!(' ', '+')
+ buffer.force_encoding(encoding)
+ end
+
+ # URL-decode an application/x-www-form-urlencoded string with encoding(optional).
+ # string = CGI.unescape("%27Stop%21%27+said+Fred")
+ # # => "'Stop!' said Fred"
+ def unescape(string, encoding = @@accept_charset)
+ str = string.tr('+', ' ')
+ str = str.b
+ str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m|
+ [m.delete('%')].pack('H*')
+ end
+ str.force_encoding(encoding)
+ str.valid_encoding? ? str : str.force_encoding(string.encoding)
+ end
+
+ # URL-encode a string following RFC 3986
+ # Space characters (+" "+) are encoded with (+"%20"+)
+ # url_encoded_string = CGI.escapeURIComponent("'Stop!' said Fred")
+ # # => "%27Stop%21%27%20said%20Fred"
+ def escapeURIComponent(string)
+ encoding = string.encoding
+ buffer = string.b
+ buffer.gsub!(/([^a-zA-Z0-9_.\-~]+)/) do |m|
+ '%' + m.unpack('H2' * m.bytesize).join('%').upcase
+ end
+ buffer.force_encoding(encoding)
+ end
+ alias escape_uri_component escapeURIComponent
+
+ # URL-decode a string following RFC 3986 with encoding(optional).
+ # string = CGI.unescapeURIComponent("%27Stop%21%27+said%20Fred")
+ # # => "'Stop!'+said Fred"
+ def unescapeURIComponent(string, encoding = @@accept_charset)
+ str = string.b
+ str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m|
+ [m.delete('%')].pack('H*')
+ end
+ str.force_encoding(encoding)
+ str.valid_encoding? ? str : str.force_encoding(string.encoding)
+ end
+
+ alias unescape_uri_component unescapeURIComponent
+
+ # The set of special characters and their escaped values
+ TABLE_FOR_ESCAPE_HTML__ = {
+ "'" => ''',
+ '&' => '&',
+ '"' => '"',
+ '<' => '&lt;',
+ '>' => '&gt;',
+ }
+
+ # Escape special characters in HTML, namely '&\"<>
+ # CGI.escapeHTML('Usage: foo "bar" <baz>')
+ # # => "Usage: foo &quot;bar&quot; &lt;baz&gt;"
+ def escapeHTML(string)
+ enc = string.encoding
+ unless enc.ascii_compatible?
+ if enc.dummy?
+ origenc = enc
+ enc = Encoding::Converter.asciicompat_encoding(enc)
+ string = enc ? string.encode(enc) : string.b
+ end
+ table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}]
+ string = string.gsub(/#{"['&\"<>]".encode(enc)}/, table)
+ string.encode!(origenc) if origenc
+ string
+ else
+ string = string.b
+ string.gsub!(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__)
+ string.force_encoding(enc)
+ end
+ end
+
+ # Unescape a string that has been HTML-escaped
+ # CGI.unescapeHTML("Usage: foo &quot;bar&quot; &lt;baz&gt;")
+ # # => "Usage: foo \"bar\" <baz>"
+ def unescapeHTML(string)
+ enc = string.encoding
+ unless enc.ascii_compatible?
+ if enc.dummy?
+ origenc = enc
+ enc = Encoding::Converter.asciicompat_encoding(enc)
+ string = enc ? string.encode(enc) : string.b
+ end
+ string = string.gsub(Regexp.new('&(apos|amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do
+ case $1.encode(Encoding::US_ASCII)
+ when 'apos' then "'".encode(enc)
+ when 'amp' then '&'.encode(enc)
+ when 'quot' then '"'.encode(enc)
+ when 'gt' then '>'.encode(enc)
+ when 'lt' then '<'.encode(enc)
+ when /\A#0*(\d+)\z/ then $1.to_i.chr(enc)
+ when /\A#x([0-9a-f]+)\z/i then $1.hex.chr(enc)
+ end
+ end
+ string.encode!(origenc) if origenc
+ return string
+ end
+ return string unless string.include? '&'
+ charlimit = case enc
+ when Encoding::UTF_8; 0x10ffff
+ when Encoding::ISO_8859_1; 256
+ else 128
+ end
+ string = string.b
+ string.gsub!(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do
+ match = $1.dup
+ case match
+ when 'apos' then "'"
+ when 'amp' then '&'
+ when 'quot' then '"'
+ when 'gt' then '>'
+ when 'lt' then '<'
+ when /\A#0*(\d+)\z/
+ n = $1.to_i
+ if n < charlimit
+ n.chr(enc)
+ else
+ "&##{$1};"
+ end
+ when /\A#x([0-9a-f]+)\z/i
+ n = $1.hex
+ if n < charlimit
+ n.chr(enc)
+ else
+ "&#x#{$1};"
+ end
+ else
+ "&#{match};"
+ end
+ end
+ string.force_encoding enc
+ end
+
+ # Synonym for CGI.escapeHTML(str)
+ alias escape_html escapeHTML
+ alias h escapeHTML
+
+ # Synonym for CGI.unescapeHTML(str)
+ alias unescape_html unescapeHTML
+
+ # TruffleRuby runs the pure-Ruby variant faster, do not use the C extension there
+ unless RUBY_ENGINE == 'truffleruby'
+ begin
+ require 'cgi/escape.so'
+ rescue LoadError
+ end
+ end
+
+ # Escape only the tags of certain HTML elements in +string+.
+ #
+ # Takes an element or elements or array of elements. Each element
+ # is specified by the name of the element, without angle brackets.
+ # This matches both the start and the end tag of that element.
+ # The attribute list of the open tag will also be escaped (for
+ # instance, the double-quotes surrounding attribute values).
+ #
+ # print CGI.escapeElement('<BR><A HREF="url"></A>', "A", "IMG")
+ # # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"
+ #
+ # print CGI.escapeElement('<BR><A HREF="url"></A>', ["A", "IMG"])
+ # # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"
+ def escapeElement(string, *elements)
+ elements = elements[0] if elements[0].kind_of?(Array)
+ unless elements.empty?
+ string.gsub(/<\/?(?:#{elements.join("|")})\b[^<>]*+>?/im) do
+ CGI.escapeHTML($&)
+ end
+ else
+ string
+ end
+ end
+
+ # Undo escaping such as that done by CGI.escapeElement()
+ #
+ # print CGI.unescapeElement(
+ # CGI.escapeHTML('<BR><A HREF="url"></A>'), "A", "IMG")
+ # # "&lt;BR&gt;<A HREF="url"></A>"
+ #
+ # print CGI.unescapeElement(
+ # CGI.escapeHTML('<BR><A HREF="url"></A>'), ["A", "IMG"])
+ # # "&lt;BR&gt;<A HREF="url"></A>"
+ def unescapeElement(string, *elements)
+ elements = elements[0] if elements[0].kind_of?(Array)
+ unless elements.empty?
+ string.gsub(/&lt;\/?(?:#{elements.join("|")})\b(?>[^&]+|&(?![gl]t;)\w+;)*(?:&gt;)?/im) do
+ unescapeHTML($&)
+ end
+ else
+ string
+ end
+ end
+
+ # Synonym for CGI.escapeElement(str)
+ alias escape_element escapeElement
+
+ # Synonym for CGI.unescapeElement(str)
+ alias unescape_element unescapeElement
+
+end
diff --git a/lib/cgi/util.rb b/lib/cgi/util.rb
index 5f12eae130..5d02b9d09f 100644
--- a/lib/cgi/util.rb
+++ b/lib/cgi/util.rb
@@ -4,220 +4,8 @@ class CGI
include Util
extend Util
end
-module CGI::Util
- @@accept_charset = Encoding::UTF_8 unless defined?(@@accept_charset)
-
- # URL-encode a string into application/x-www-form-urlencoded.
- # Space characters (+" "+) are encoded with plus signs (+"+"+)
- # url_encoded_string = CGI.escape("'Stop!' said Fred")
- # # => "%27Stop%21%27+said+Fred"
- def escape(string)
- encoding = string.encoding
- buffer = string.b
- buffer.gsub!(/([^ a-zA-Z0-9_.\-~]+)/) do |m|
- '%' + m.unpack('H2' * m.bytesize).join('%').upcase
- end
- buffer.tr!(' ', '+')
- buffer.force_encoding(encoding)
- end
-
- # URL-decode an application/x-www-form-urlencoded string with encoding(optional).
- # string = CGI.unescape("%27Stop%21%27+said+Fred")
- # # => "'Stop!' said Fred"
- def unescape(string, encoding = @@accept_charset)
- str = string.tr('+', ' ')
- str = str.b
- str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m|
- [m.delete('%')].pack('H*')
- end
- str.force_encoding(encoding)
- str.valid_encoding? ? str : str.force_encoding(string.encoding)
- end
-
- # URL-encode a string following RFC 3986
- # Space characters (+" "+) are encoded with (+"%20"+)
- # url_encoded_string = CGI.escapeURIComponent("'Stop!' said Fred")
- # # => "%27Stop%21%27%20said%20Fred"
- def escapeURIComponent(string)
- encoding = string.encoding
- buffer = string.b
- buffer.gsub!(/([^a-zA-Z0-9_.\-~]+)/) do |m|
- '%' + m.unpack('H2' * m.bytesize).join('%').upcase
- end
- buffer.force_encoding(encoding)
- end
- alias escape_uri_component escapeURIComponent
-
- # URL-decode a string following RFC 3986 with encoding(optional).
- # string = CGI.unescapeURIComponent("%27Stop%21%27+said%20Fred")
- # # => "'Stop!'+said Fred"
- def unescapeURIComponent(string, encoding = @@accept_charset)
- str = string.b
- str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m|
- [m.delete('%')].pack('H*')
- end
- str.force_encoding(encoding)
- str.valid_encoding? ? str : str.force_encoding(string.encoding)
- end
-
- alias unescape_uri_component unescapeURIComponent
-
- # The set of special characters and their escaped values
- TABLE_FOR_ESCAPE_HTML__ = {
- "'" => '&#39;',
- '&' => '&amp;',
- '"' => '&quot;',
- '<' => '&lt;',
- '>' => '&gt;',
- }
-
- # Escape special characters in HTML, namely '&\"<>
- # CGI.escapeHTML('Usage: foo "bar" <baz>')
- # # => "Usage: foo &quot;bar&quot; &lt;baz&gt;"
- def escapeHTML(string)
- enc = string.encoding
- unless enc.ascii_compatible?
- if enc.dummy?
- origenc = enc
- enc = Encoding::Converter.asciicompat_encoding(enc)
- string = enc ? string.encode(enc) : string.b
- end
- table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}]
- string = string.gsub(/#{"['&\"<>]".encode(enc)}/, table)
- string.encode!(origenc) if origenc
- string
- else
- string = string.b
- string.gsub!(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__)
- string.force_encoding(enc)
- end
- end
-
- # TruffleRuby runs the pure-Ruby variant faster, do not use the C extension there
- unless RUBY_ENGINE == 'truffleruby'
- begin
- require 'cgi/escape'
- rescue LoadError
- end
- end
-
- # Unescape a string that has been HTML-escaped
- # CGI.unescapeHTML("Usage: foo &quot;bar&quot; &lt;baz&gt;")
- # # => "Usage: foo \"bar\" <baz>"
- def unescapeHTML(string)
- enc = string.encoding
- unless enc.ascii_compatible?
- if enc.dummy?
- origenc = enc
- enc = Encoding::Converter.asciicompat_encoding(enc)
- string = enc ? string.encode(enc) : string.b
- end
- string = string.gsub(Regexp.new('&(apos|amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do
- case $1.encode(Encoding::US_ASCII)
- when 'apos' then "'".encode(enc)
- when 'amp' then '&'.encode(enc)
- when 'quot' then '"'.encode(enc)
- when 'gt' then '>'.encode(enc)
- when 'lt' then '<'.encode(enc)
- when /\A#0*(\d+)\z/ then $1.to_i.chr(enc)
- when /\A#x([0-9a-f]+)\z/i then $1.hex.chr(enc)
- end
- end
- string.encode!(origenc) if origenc
- return string
- end
- return string unless string.include? '&'
- charlimit = case enc
- when Encoding::UTF_8; 0x10ffff
- when Encoding::ISO_8859_1; 256
- else 128
- end
- string = string.b
- string.gsub!(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do
- match = $1.dup
- case match
- when 'apos' then "'"
- when 'amp' then '&'
- when 'quot' then '"'
- when 'gt' then '>'
- when 'lt' then '<'
- when /\A#0*(\d+)\z/
- n = $1.to_i
- if n < charlimit
- n.chr(enc)
- else
- "&##{$1};"
- end
- when /\A#x([0-9a-f]+)\z/i
- n = $1.hex
- if n < charlimit
- n.chr(enc)
- else
- "&#x#{$1};"
- end
- else
- "&#{match};"
- end
- end
- string.force_encoding enc
- end
-
- # Synonym for CGI.escapeHTML(str)
- alias escape_html escapeHTML
-
- # Synonym for CGI.unescapeHTML(str)
- alias unescape_html unescapeHTML
-
- # Escape only the tags of certain HTML elements in +string+.
- #
- # Takes an element or elements or array of elements. Each element
- # is specified by the name of the element, without angle brackets.
- # This matches both the start and the end tag of that element.
- # The attribute list of the open tag will also be escaped (for
- # instance, the double-quotes surrounding attribute values).
- #
- # print CGI.escapeElement('<BR><A HREF="url"></A>', "A", "IMG")
- # # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"
- #
- # print CGI.escapeElement('<BR><A HREF="url"></A>', ["A", "IMG"])
- # # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"
- def escapeElement(string, *elements)
- elements = elements[0] if elements[0].kind_of?(Array)
- unless elements.empty?
- string.gsub(/<\/?(?:#{elements.join("|")})\b[^<>]*+>?/im) do
- CGI.escapeHTML($&)
- end
- else
- string
- end
- end
-
- # Undo escaping such as that done by CGI.escapeElement()
- #
- # print CGI.unescapeElement(
- # CGI.escapeHTML('<BR><A HREF="url"></A>'), "A", "IMG")
- # # "&lt;BR&gt;<A HREF="url"></A>"
- #
- # print CGI.unescapeElement(
- # CGI.escapeHTML('<BR><A HREF="url"></A>'), ["A", "IMG"])
- # # "&lt;BR&gt;<A HREF="url"></A>"
- def unescapeElement(string, *elements)
- elements = elements[0] if elements[0].kind_of?(Array)
- unless elements.empty?
- string.gsub(/&lt;\/?(?:#{elements.join("|")})\b(?>[^&]+|&(?![gl]t;)\w+;)*(?:&gt;)?/im) do
- unescapeHTML($&)
- end
- else
- string
- end
- end
-
- # Synonym for CGI.escapeElement(str)
- alias escape_element escapeElement
-
- # Synonym for CGI.unescapeElement(str)
- alias unescape_element unescapeElement
+module CGI::Util
# Format a +Time+ object as a String using the format specified by RFC 1123.
#
# CGI.rfc1123_date(Time.now)
@@ -253,6 +41,4 @@ module CGI::Util
end
lines.gsub(/^((?:#{Regexp::quote(shift)})*)__(?=<\/?\w)/, '\1')
end
-
- alias h escapeHTML
end