summaryrefslogtreecommitdiff
path: root/test/csv/parse
diff options
context:
space:
mode:
Diffstat (limited to 'test/csv/parse')
-rw-r--r--test/csv/parse/test_convert.rb55
-rw-r--r--test/csv/parse/test_general.rb88
-rw-r--r--test/csv/parse/test_header.rb9
-rw-r--r--test/csv/parse/test_inputs_scanner.rb63
-rw-r--r--test/csv/parse/test_liberal_parsing.rb11
-rw-r--r--test/csv/parse/test_read.rb27
6 files changed, 244 insertions, 9 deletions
diff --git a/test/csv/parse/test_convert.rb b/test/csv/parse/test_convert.rb
index 21d9f20b28..c9195c71d9 100644
--- a/test/csv/parse/test_convert.rb
+++ b/test/csv/parse/test_convert.rb
@@ -15,6 +15,22 @@ class TestCSVParseConvert < Test::Unit::TestCase
@time = Time.utc(2018, 12, 30, 6, 41, 29)
@windows_safe_time_data = @time.strftime("%a %b %d %H:%M:%S %Y")
+
+ @preserving_converter = lambda do |field, info|
+ f = field.encode(CSV::ConverterEncoding)
+ return f if info.quoted?
+ begin
+ Integer(f, 10)
+ rescue
+ f
+ end
+ end
+
+ @quoted_header_converter = lambda do |field, info|
+ f = field.encode(CSV::ConverterEncoding)
+ return f if info.quoted?
+ f.to_sym
+ end
end
def test_integer
@@ -107,4 +123,43 @@ class TestCSVParseConvert < Test::Unit::TestCase
assert_equal([nil, "empty", "a"],
CSV.parse_line(',"",a', empty_value: "empty"))
end
+
+ def test_quoted_parse_line
+ row = CSV.parse_line('1,"2",3', converters: @preserving_converter)
+ assert_equal([1, "2", 3], row)
+ end
+
+ def test_quoted_parse
+ expected = [["quoted", "unquoted"], ["109", 1], ["10A", 2]]
+ rows = CSV.parse(<<~CSV, converters: @preserving_converter)
+ "quoted",unquoted
+ "109",1
+ "10A",2
+ CSV
+ assert_equal(expected, rows)
+ end
+
+ def test_quoted_alternating_quote
+ row = CSV.parse_line('"1",2,"3"', converters: @preserving_converter)
+ assert_equal(['1', 2, '3'], row)
+ end
+
+ def test_quoted_parse_headers
+ expected = [["quoted", :unquoted], ["109", "1"], ["10A", "2"]]
+ table = CSV.parse(<<~CSV, headers: true, header_converters: @quoted_header_converter)
+ "quoted",unquoted
+ "109",1
+ "10A",2
+ CSV
+ assert_equal(expected, table.to_a)
+ end
+
+ def test_quoted_parse_with_string_headers
+ expected = [["quoted", :unquoted], %w[109 1], %w[10A 2]]
+ table = CSV.parse(<<~CSV, headers: '"quoted",unquoted', header_converters: @quoted_header_converter)
+ "109",1
+ "10A",2
+ CSV
+ assert_equal(expected, table.to_a)
+ end
end
diff --git a/test/csv/parse/test_general.rb b/test/csv/parse/test_general.rb
index c740462c01..902be2ce4a 100644
--- a/test/csv/parse/test_general.rb
+++ b/test/csv/parse/test_general.rb
@@ -199,6 +199,32 @@ line,5,jkl
field_size_limit: 2048 )
end
+ def test_field_size_limit_max_allowed
+ column = "abcde"
+ assert_equal([[column]],
+ CSV.parse("\"#{column}\"",
+ field_size_limit: column.size + 1))
+ end
+
+ def test_field_size_limit_quote_simple
+ column = "abcde"
+ assert_parse_errors_out("\"#{column}\"",
+ field_size_limit: column.size)
+ end
+
+ def test_field_size_limit_no_quote_implicitly
+ column = "abcde"
+ assert_parse_errors_out("#{column}",
+ field_size_limit: column.size)
+ end
+
+ def test_field_size_limit_no_quote_explicitly
+ column = "abcde"
+ assert_parse_errors_out("#{column}",
+ field_size_limit: column.size,
+ quote_char: nil)
+ end
+
def test_field_size_limit_in_extended_column_not_exceeding
data = <<~DATA
"a","b"
@@ -221,6 +247,59 @@ line,5,jkl
assert_parse_errors_out(data, field_size_limit: 5)
end
+ def test_max_field_size_controls_lookahead
+ assert_parse_errors_out( 'valid,fields,"' + BIG_DATA + '"',
+ max_field_size: 2048 )
+ end
+
+ def test_max_field_size_max_allowed
+ column = "abcde"
+ assert_equal([[column]],
+ CSV.parse("\"#{column}\"",
+ max_field_size: column.size))
+ end
+
+ def test_max_field_size_quote_simple
+ column = "abcde"
+ assert_parse_errors_out("\"#{column}\"",
+ max_field_size: column.size - 1)
+ end
+
+ def test_max_field_size_no_quote_implicitly
+ column = "abcde"
+ assert_parse_errors_out("#{column}",
+ max_field_size: column.size - 1)
+ end
+
+ def test_max_field_size_no_quote_explicitly
+ column = "abcde"
+ assert_parse_errors_out("#{column}",
+ max_field_size: column.size - 1,
+ quote_char: nil)
+ end
+
+ def test_max_field_size_in_extended_column_not_exceeding
+ data = <<~DATA
+ "a","b"
+ "
+ 2
+ ",""
+ DATA
+ assert_nothing_raised(CSV::MalformedCSVError) do
+ CSV.parse(data, max_field_size: 3)
+ end
+ end
+
+ def test_max_field_size_in_extended_column_exceeding
+ data = <<~DATA
+ "a","b"
+ "
+ 2345
+ ",""
+ DATA
+ assert_parse_errors_out(data, max_field_size: 4)
+ end
+
def test_row_sep_auto_cr
assert_equal([["a"]], CSV.parse("a\r"))
end
@@ -246,14 +325,7 @@ line,5,jkl
private
def assert_parse_errors_out(data, **options)
assert_raise(CSV::MalformedCSVError) do
- timeout = 0.2
- if defined?(RubyVM::YJIT.enabled?) and RubyVM::YJIT.enabled?
- timeout = 1 # for --yjit-call-threshold=1
- end
- if defined?(RubyVM::MJIT.enabled?) and RubyVM::MJIT.enabled?
- timeout = 5 # for --jit-wait
- end
- Timeout.timeout(timeout) do
+ Timeout.timeout(0.2) do
CSV.parse(data, **options)
fail("Parse didn't error out")
end
diff --git a/test/csv/parse/test_header.rb b/test/csv/parse/test_header.rb
index 481c5107c6..e8c3786d68 100644
--- a/test/csv/parse/test_header.rb
+++ b/test/csv/parse/test_header.rb
@@ -218,6 +218,13 @@ A,B,C
assert_equal([:one, :two_three], csv.headers)
end
+ def test_builtin_symbol_raw_converter
+ csv = CSV.parse( "a b,c d", headers: true,
+ return_headers: true,
+ header_converters: :symbol_raw )
+ assert_equal([:"a b", :"c d"], csv.headers)
+ end
+
def test_builtin_symbol_converter_with_punctuation
csv = CSV.parse( "One, Two & Three ($)", headers: true,
return_headers: true,
@@ -228,7 +235,7 @@ A,B,C
def test_builtin_converters_with_blank_header
csv = CSV.parse( "one,,three", headers: true,
return_headers: true,
- header_converters: [:downcase, :symbol] )
+ header_converters: [:downcase, :symbol, :symbol_raw] )
assert_equal([:one, nil, :three], csv.headers)
end
diff --git a/test/csv/parse/test_inputs_scanner.rb b/test/csv/parse/test_inputs_scanner.rb
new file mode 100644
index 0000000000..06e1c845d5
--- /dev/null
+++ b/test/csv/parse/test_inputs_scanner.rb
@@ -0,0 +1,63 @@
+require_relative "../helper"
+
+class TestCSVParseInputsScanner < Test::Unit::TestCase
+ include Helper
+
+ def test_scan_keep_over_chunks_nested_back
+ input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl")
+ scanner = CSV::Parser::InputsScanner.new([input],
+ Encoding::UTF_8,
+ nil,
+ chunk_size: 2)
+ scanner.keep_start
+ assert_equal("abc", scanner.scan_all(/[a-c]+/))
+ scanner.keep_start
+ assert_equal("def", scanner.scan_all(/[d-f]+/))
+ scanner.keep_back
+ scanner.keep_back
+ assert_equal("abcdefg", scanner.scan_all(/[a-g]+/))
+ end
+
+ def test_scan_keep_over_chunks_nested_drop_back
+ input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl")
+ scanner = CSV::Parser::InputsScanner.new([input],
+ Encoding::UTF_8,
+ nil,
+ chunk_size: 3)
+ scanner.keep_start
+ assert_equal("ab", scanner.scan(/../))
+ scanner.keep_start
+ assert_equal("c", scanner.scan(/./))
+ assert_equal("d", scanner.scan(/./))
+ scanner.keep_drop
+ scanner.keep_back
+ assert_equal("abcdefg", scanner.scan_all(/[a-g]+/))
+ end
+
+ def test_each_line_keep_over_chunks_multibyte
+ input = CSV::Parser::UnoptimizedStringIO.new("ab\n\u{3000}a\n")
+ scanner = CSV::Parser::InputsScanner.new([input],
+ Encoding::UTF_8,
+ nil,
+ chunk_size: 1)
+ each_line = scanner.each_line("\n")
+ assert_equal("ab\n", each_line.next)
+ scanner.keep_start
+ assert_equal("\u{3000}a\n", each_line.next)
+ scanner.keep_back
+ assert_equal("\u{3000}a\n", scanner.scan_all(/[^,]+/))
+ end
+
+ def test_each_line_keep_over_chunks_fit_chunk_size
+ input = CSV::Parser::UnoptimizedStringIO.new("\na")
+ scanner = CSV::Parser::InputsScanner.new([input],
+ Encoding::UTF_8,
+ nil,
+ chunk_size: 1)
+ each_line = scanner.each_line("\n")
+ assert_equal("\n", each_line.next)
+ scanner.keep_start
+ assert_equal("a", each_line.next)
+ scanner.keep_back
+ end
+end
diff --git a/test/csv/parse/test_liberal_parsing.rb b/test/csv/parse/test_liberal_parsing.rb
index 2f7b34689f..5796d10828 100644
--- a/test/csv/parse/test_liberal_parsing.rb
+++ b/test/csv/parse/test_liberal_parsing.rb
@@ -28,6 +28,17 @@ class TestCSVParseLiberalParsing < Test::Unit::TestCase
CSV.parse_line(input, liberal_parsing: true))
end
+ def test_endline_after_quoted_field_end
+ csv = CSV.new("A\r\n\"B\"\nC\r\n", liberal_parsing: true)
+ assert_equal(["A"], csv.gets)
+ error = assert_raise(CSV::MalformedCSVError) do
+ csv.gets
+ end
+ assert_equal('Illegal end-of-line sequence outside of a quoted field <"\n"> in line 2.',
+ error.message)
+ assert_equal(["C"], csv.gets)
+ end
+
def test_quote_after_column_separator
error = assert_raise(CSV::MalformedCSVError) do
CSV.parse_line('is,this "three," or four,fields', liberal_parsing: true)
diff --git a/test/csv/parse/test_read.rb b/test/csv/parse/test_read.rb
new file mode 100644
index 0000000000..ba6fe985a9
--- /dev/null
+++ b/test/csv/parse/test_read.rb
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVParseRead < Test::Unit::TestCase
+ extend DifferentOFS
+
+ def test_shift
+ data = <<-CSV
+1
+2
+3
+ CSV
+ csv = CSV.new(data)
+ assert_equal([
+ ["1"],
+ [["2"], ["3"]],
+ nil,
+ ],
+ [
+ csv.shift,
+ csv.read,
+ csv.shift,
+ ])
+ end
+end