diff options
Diffstat (limited to 'test/csv/parse')
-rw-r--r-- | test/csv/parse/test_convert.rb | 55 | ||||
-rw-r--r-- | test/csv/parse/test_general.rb | 88 | ||||
-rw-r--r-- | test/csv/parse/test_header.rb | 9 | ||||
-rw-r--r-- | test/csv/parse/test_inputs_scanner.rb | 63 | ||||
-rw-r--r-- | test/csv/parse/test_liberal_parsing.rb | 11 | ||||
-rw-r--r-- | test/csv/parse/test_read.rb | 27 |
6 files changed, 244 insertions, 9 deletions
diff --git a/test/csv/parse/test_convert.rb b/test/csv/parse/test_convert.rb index 21d9f20b28..c9195c71d9 100644 --- a/test/csv/parse/test_convert.rb +++ b/test/csv/parse/test_convert.rb @@ -15,6 +15,22 @@ class TestCSVParseConvert < Test::Unit::TestCase @time = Time.utc(2018, 12, 30, 6, 41, 29) @windows_safe_time_data = @time.strftime("%a %b %d %H:%M:%S %Y") + + @preserving_converter = lambda do |field, info| + f = field.encode(CSV::ConverterEncoding) + return f if info.quoted? + begin + Integer(f, 10) + rescue + f + end + end + + @quoted_header_converter = lambda do |field, info| + f = field.encode(CSV::ConverterEncoding) + return f if info.quoted? + f.to_sym + end end def test_integer @@ -107,4 +123,43 @@ class TestCSVParseConvert < Test::Unit::TestCase assert_equal([nil, "empty", "a"], CSV.parse_line(',"",a', empty_value: "empty")) end + + def test_quoted_parse_line + row = CSV.parse_line('1,"2",3', converters: @preserving_converter) + assert_equal([1, "2", 3], row) + end + + def test_quoted_parse + expected = [["quoted", "unquoted"], ["109", 1], ["10A", 2]] + rows = CSV.parse(<<~CSV, converters: @preserving_converter) + "quoted",unquoted + "109",1 + "10A",2 + CSV + assert_equal(expected, rows) + end + + def test_quoted_alternating_quote + row = CSV.parse_line('"1",2,"3"', converters: @preserving_converter) + assert_equal(['1', 2, '3'], row) + end + + def test_quoted_parse_headers + expected = [["quoted", :unquoted], ["109", "1"], ["10A", "2"]] + table = CSV.parse(<<~CSV, headers: true, header_converters: @quoted_header_converter) + "quoted",unquoted + "109",1 + "10A",2 + CSV + assert_equal(expected, table.to_a) + end + + def test_quoted_parse_with_string_headers + expected = [["quoted", :unquoted], %w[109 1], %w[10A 2]] + table = CSV.parse(<<~CSV, headers: '"quoted",unquoted', header_converters: @quoted_header_converter) + "109",1 + "10A",2 + CSV + assert_equal(expected, table.to_a) + end end diff --git a/test/csv/parse/test_general.rb b/test/csv/parse/test_general.rb index c740462c01..902be2ce4a 100644 --- a/test/csv/parse/test_general.rb +++ b/test/csv/parse/test_general.rb @@ -199,6 +199,32 @@ line,5,jkl field_size_limit: 2048 ) end + def test_field_size_limit_max_allowed + column = "abcde" + assert_equal([[column]], + CSV.parse("\"#{column}\"", + field_size_limit: column.size + 1)) + end + + def test_field_size_limit_quote_simple + column = "abcde" + assert_parse_errors_out("\"#{column}\"", + field_size_limit: column.size) + end + + def test_field_size_limit_no_quote_implicitly + column = "abcde" + assert_parse_errors_out("#{column}", + field_size_limit: column.size) + end + + def test_field_size_limit_no_quote_explicitly + column = "abcde" + assert_parse_errors_out("#{column}", + field_size_limit: column.size, + quote_char: nil) + end + def test_field_size_limit_in_extended_column_not_exceeding data = <<~DATA "a","b" @@ -221,6 +247,59 @@ line,5,jkl assert_parse_errors_out(data, field_size_limit: 5) end + def test_max_field_size_controls_lookahead + assert_parse_errors_out( 'valid,fields,"' + BIG_DATA + '"', + max_field_size: 2048 ) + end + + def test_max_field_size_max_allowed + column = "abcde" + assert_equal([[column]], + CSV.parse("\"#{column}\"", + max_field_size: column.size)) + end + + def test_max_field_size_quote_simple + column = "abcde" + assert_parse_errors_out("\"#{column}\"", + max_field_size: column.size - 1) + end + + def test_max_field_size_no_quote_implicitly + column = "abcde" + assert_parse_errors_out("#{column}", + max_field_size: column.size - 1) + end + + def test_max_field_size_no_quote_explicitly + column = "abcde" + assert_parse_errors_out("#{column}", + max_field_size: column.size - 1, + quote_char: nil) + end + + def test_max_field_size_in_extended_column_not_exceeding + data = <<~DATA + "a","b" + " + 2 + ","" + DATA + assert_nothing_raised(CSV::MalformedCSVError) do + CSV.parse(data, max_field_size: 3) + end + end + + def test_max_field_size_in_extended_column_exceeding + data = <<~DATA + "a","b" + " + 2345 + ","" + DATA + assert_parse_errors_out(data, max_field_size: 4) + end + def test_row_sep_auto_cr assert_equal([["a"]], CSV.parse("a\r")) end @@ -246,14 +325,7 @@ line,5,jkl private def assert_parse_errors_out(data, **options) assert_raise(CSV::MalformedCSVError) do - timeout = 0.2 - if defined?(RubyVM::YJIT.enabled?) and RubyVM::YJIT.enabled? - timeout = 1 # for --yjit-call-threshold=1 - end - if defined?(RubyVM::MJIT.enabled?) and RubyVM::MJIT.enabled? - timeout = 5 # for --jit-wait - end - Timeout.timeout(timeout) do + Timeout.timeout(0.2) do CSV.parse(data, **options) fail("Parse didn't error out") end diff --git a/test/csv/parse/test_header.rb b/test/csv/parse/test_header.rb index 481c5107c6..e8c3786d68 100644 --- a/test/csv/parse/test_header.rb +++ b/test/csv/parse/test_header.rb @@ -218,6 +218,13 @@ A,B,C assert_equal([:one, :two_three], csv.headers) end + def test_builtin_symbol_raw_converter + csv = CSV.parse( "a b,c d", headers: true, + return_headers: true, + header_converters: :symbol_raw ) + assert_equal([:"a b", :"c d"], csv.headers) + end + def test_builtin_symbol_converter_with_punctuation csv = CSV.parse( "One, Two & Three ($)", headers: true, return_headers: true, @@ -228,7 +235,7 @@ A,B,C def test_builtin_converters_with_blank_header csv = CSV.parse( "one,,three", headers: true, return_headers: true, - header_converters: [:downcase, :symbol] ) + header_converters: [:downcase, :symbol, :symbol_raw] ) assert_equal([:one, nil, :three], csv.headers) end diff --git a/test/csv/parse/test_inputs_scanner.rb b/test/csv/parse/test_inputs_scanner.rb new file mode 100644 index 0000000000..06e1c845d5 --- /dev/null +++ b/test/csv/parse/test_inputs_scanner.rb @@ -0,0 +1,63 @@ +require_relative "../helper" + +class TestCSVParseInputsScanner < Test::Unit::TestCase + include Helper + + def test_scan_keep_over_chunks_nested_back + input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl") + scanner = CSV::Parser::InputsScanner.new([input], + Encoding::UTF_8, + nil, + chunk_size: 2) + scanner.keep_start + assert_equal("abc", scanner.scan_all(/[a-c]+/)) + scanner.keep_start + assert_equal("def", scanner.scan_all(/[d-f]+/)) + scanner.keep_back + scanner.keep_back + assert_equal("abcdefg", scanner.scan_all(/[a-g]+/)) + end + + def test_scan_keep_over_chunks_nested_drop_back + input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl") + scanner = CSV::Parser::InputsScanner.new([input], + Encoding::UTF_8, + nil, + chunk_size: 3) + scanner.keep_start + assert_equal("ab", scanner.scan(/../)) + scanner.keep_start + assert_equal("c", scanner.scan(/./)) + assert_equal("d", scanner.scan(/./)) + scanner.keep_drop + scanner.keep_back + assert_equal("abcdefg", scanner.scan_all(/[a-g]+/)) + end + + def test_each_line_keep_over_chunks_multibyte + input = CSV::Parser::UnoptimizedStringIO.new("ab\n\u{3000}a\n") + scanner = CSV::Parser::InputsScanner.new([input], + Encoding::UTF_8, + nil, + chunk_size: 1) + each_line = scanner.each_line("\n") + assert_equal("ab\n", each_line.next) + scanner.keep_start + assert_equal("\u{3000}a\n", each_line.next) + scanner.keep_back + assert_equal("\u{3000}a\n", scanner.scan_all(/[^,]+/)) + end + + def test_each_line_keep_over_chunks_fit_chunk_size + input = CSV::Parser::UnoptimizedStringIO.new("\na") + scanner = CSV::Parser::InputsScanner.new([input], + Encoding::UTF_8, + nil, + chunk_size: 1) + each_line = scanner.each_line("\n") + assert_equal("\n", each_line.next) + scanner.keep_start + assert_equal("a", each_line.next) + scanner.keep_back + end +end diff --git a/test/csv/parse/test_liberal_parsing.rb b/test/csv/parse/test_liberal_parsing.rb index 2f7b34689f..5796d10828 100644 --- a/test/csv/parse/test_liberal_parsing.rb +++ b/test/csv/parse/test_liberal_parsing.rb @@ -28,6 +28,17 @@ class TestCSVParseLiberalParsing < Test::Unit::TestCase CSV.parse_line(input, liberal_parsing: true)) end + def test_endline_after_quoted_field_end + csv = CSV.new("A\r\n\"B\"\nC\r\n", liberal_parsing: true) + assert_equal(["A"], csv.gets) + error = assert_raise(CSV::MalformedCSVError) do + csv.gets + end + assert_equal('Illegal end-of-line sequence outside of a quoted field <"\n"> in line 2.', + error.message) + assert_equal(["C"], csv.gets) + end + def test_quote_after_column_separator error = assert_raise(CSV::MalformedCSVError) do CSV.parse_line('is,this "three," or four,fields', liberal_parsing: true) diff --git a/test/csv/parse/test_read.rb b/test/csv/parse/test_read.rb new file mode 100644 index 0000000000..ba6fe985a9 --- /dev/null +++ b/test/csv/parse/test_read.rb @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +# frozen_string_literal: false + +require_relative "../helper" + +class TestCSVParseRead < Test::Unit::TestCase + extend DifferentOFS + + def test_shift + data = <<-CSV +1 +2 +3 + CSV + csv = CSV.new(data) + assert_equal([ + ["1"], + [["2"], ["3"]], + nil, + ], + [ + csv.shift, + csv.read, + csv.shift, + ]) + end +end |