diff options
author | yui-knk <[email protected]> | 2023-06-13 22:22:44 +0900 |
---|---|---|
committer | Yuichiro Kaneko <[email protected]> | 2023-06-14 10:40:25 +0900 |
commit | 8722342ca9b5f770e678ccc93c4f68351e267c3c (patch) | |
tree | 29fd05bd216b246fee8d8ee7cb0021224259ea02 | |
parent | 5a69e8bcefcb609bf02b278a19cd9bf9018cea82 (diff) |
Lrama v0.5.2
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/7939
-rw-r--r-- | tool/lrama/LEGAL.md | 26 | ||||
-rw-r--r-- | tool/lrama/MIT | 21 | ||||
-rw-r--r-- | tool/lrama/lib/lrama/grammar.rb | 50 | ||||
-rw-r--r-- | tool/lrama/lib/lrama/lexer.rb | 36 | ||||
-rw-r--r-- | tool/lrama/lib/lrama/parser.rb | 15 | ||||
-rw-r--r-- | tool/lrama/lib/lrama/parser/token_scanner.rb | 4 | ||||
-rw-r--r-- | tool/lrama/lib/lrama/version.rb | 2 |
7 files changed, 127 insertions, 27 deletions
diff --git a/tool/lrama/LEGAL.md b/tool/lrama/LEGAL.md new file mode 100644 index 0000000000..b1a15b96b5 --- /dev/null +++ b/tool/lrama/LEGAL.md @@ -0,0 +1,26 @@ +# LEGAL NOTICE INFORMATION + +All the files in this distribution are covered under the MIT License except some files +mentioned below. + +## GNU General Public License version 3 + +These files are licensed under the GNU General Public License version 3. See these files for more information. + +* template/bison/yacc.c +* template/bison/yacc.h + +## Same with Ruby + +These files are licensed same with Ruby. See https://github.com/ruby/ruby/blob/master/COPYING for more information. + +* spec/fixtures/integration/ruby_3_0_5/parse.tmp.y +* spec/fixtures/integration/ruby_3_0_5/y.tab.c +* spec/fixtures/integration/ruby_3_0_5/y.tab.h +* spec/fixtures/integration/ruby_3_1_0/parse.tmp.y +* spec/fixtures/integration/ruby_3_1_0/y.tab.c +* spec/fixtures/integration/ruby_3_1_0/y.tab.h +* spec/fixtures/integration/ruby_3_2_0/parse.tmp.y +* spec/fixtures/integration/ruby_3_2_0/y.tab.c +* spec/fixtures/integration/ruby_3_2_0/y.tab.h + diff --git a/tool/lrama/MIT b/tool/lrama/MIT new file mode 100644 index 0000000000..b23d5210d5 --- /dev/null +++ b/tool/lrama/MIT @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2023 Yuichiro Kaneko + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/tool/lrama/lib/lrama/grammar.rb b/tool/lrama/lib/lrama/grammar.rb index 1daec4446b..a13c5807b2 100644 --- a/tool/lrama/lib/lrama/grammar.rb +++ b/tool/lrama/lib/lrama/grammar.rb @@ -155,16 +155,16 @@ module Lrama last_column = ref.last_column case - when ref.number == "$" && ref.type == :dollar # $$ + when ref.value == "$" && ref.type == :dollar # $$ # Omit "<>" member = tag.s_value[1..-2] str = "((*yyvaluep).#{member})" - when ref.number == "$" && ref.type == :at # @$ + when ref.value == "$" && ref.type == :at # @$ str = "(*yylocationp)" when ref.type == :dollar # $n - raise "$#{ref.number} can not be used in %printer." + raise "$#{ref.value} can not be used in %printer." when ref.type == :at # @n - raise "@#{ref.number} can not be used in %printer." + raise "@#{ref.value} can not be used in %printer." else raise "Unexpected. #{self}, #{ref}" end @@ -190,19 +190,19 @@ module Lrama last_column = ref.last_column case - when ref.number == "$" && ref.type == :dollar # $$ + when ref.value == "$" && ref.type == :dollar # $$ # Omit "<>" member = ref.tag.s_value[1..-2] str = "(yyval.#{member})" - when ref.number == "$" && ref.type == :at # @$ + when ref.value == "$" && ref.type == :at # @$ str = "(yyloc)" when ref.type == :dollar # $n - i = -ref.position_in_rhs + ref.number + i = -ref.position_in_rhs + ref.value # Omit "<>" member = ref.tag.s_value[1..-2] str = "(yyvsp[#{i}].#{member})" when ref.type == :at # @n - i = -ref.position_in_rhs + ref.number + i = -ref.position_in_rhs + ref.value str = "(yylsp[#{i}])" else raise "Unexpected. #{self}, #{ref}" @@ -226,14 +226,14 @@ module Lrama last_column = ref.last_column case - when ref.number == "$" && ref.type == :dollar # $$ + when ref.value == "$" && ref.type == :dollar # $$ str = "yylval" - when ref.number == "$" && ref.type == :at # @$ + when ref.value == "$" && ref.type == :at # @$ str = "yylloc" when ref.type == :dollar # $n - raise "$#{ref.number} can not be used in initial_action." + raise "$#{ref.value} can not be used in initial_action." when ref.type == :at # @n - raise "@#{ref.number} can not be used in initial_action." + raise "@#{ref.value} can not be used in initial_action." else raise "Unexpected. #{self}, #{ref}" end @@ -247,7 +247,7 @@ module Lrama # type: :dollar or :at # ex_tag: "$<tag>1" (Optional) - Reference = Struct.new(:type, :number, :ex_tag, :first_column, :last_column, :referring_symbol, :position_in_rhs, keyword_init: true) do + Reference = Struct.new(:type, :value, :ex_tag, :first_column, :last_column, :referring_symbol, :position_in_rhs, keyword_init: true) do def tag if ex_tag ex_tag @@ -382,8 +382,8 @@ module Lrama end def build_references(token_code) - token_code.references.map! do |type, number, tag, first_column, last_column| - Reference.new(type: type, number: number, ex_tag: tag, first_column: first_column, last_column: last_column) + token_code.references.map! do |type, value, tag, first_column, last_column| + Reference.new(type: type, value: value, ex_tag: tag, first_column: first_column, last_column: last_column) end token_code @@ -627,15 +627,23 @@ module Lrama ref.position_in_rhs = i - 1 next if ref.type == :at # $$, $n, @$, @n can be used in any actions - number = ref.number - if number == "$" + if ref.value == "$" # TODO: Should be postponed after middle actions are extracted? ref.referring_symbol = lhs - else - raise "Can not refer following component. #{number} >= #{i}. #{token}" if number >= i - rhs1[number - 1].referred = true - ref.referring_symbol = rhs1[number - 1] + elsif ref.value.is_a?(Integer) + raise "Can not refer following component. #{ref.value} >= #{i}. #{token}" if ref.value >= i + rhs1[ref.value - 1].referred = true + ref.referring_symbol = rhs1[ref.value - 1] + elsif ref.value.is_a?(String) + target_tokens = ([lhs] + rhs1 + [code]).compact.first(i) + referring_symbol_candidate = target_tokens.filter {|token| token.referred_by?(ref.value) } + raise "Referring symbol `#{ref.value}` is duplicated. #{token}" if referring_symbol_candidate.size >= 2 + raise "Referring symbol `#{ref.value}` is not found. #{token}" if referring_symbol_candidate.count == 0 + + referring_symbol = referring_symbol_candidate.first + referring_symbol.referred = true + ref.referring_symbol = referring_symbol end end end diff --git a/tool/lrama/lib/lrama/lexer.rb b/tool/lrama/lib/lrama/lexer.rb index 6c1139b416..fd79a46bfc 100644 --- a/tool/lrama/lib/lrama/lexer.rb +++ b/tool/lrama/lib/lrama/lexer.rb @@ -7,7 +7,7 @@ module Lrama include Lrama::Report::Duration # s_value is semantic value - Token = Struct.new(:type, :s_value, keyword_init: true) do + Token = Struct.new(:type, :s_value, :alias, keyword_init: true) do Type = Struct.new(:id, :name, keyword_init: true) attr_accessor :line, :column, :referred @@ -18,6 +18,31 @@ module Lrama "#{super} line: #{line}, column: #{column}" end + def referred_by?(string) + [self.s_value, self.alias].include?(string) + end + + def ==(other) + self.class == other.class && self.type == other.type && self.s_value == other.s_value + end + + def numberize_references(lhs, rhs) + self.references.map! {|ref| + ref_name = ref[1] + if ref_name.is_a?(String) && ref_name != '$' + value = + if lhs.referred_by?(ref_name) + '$' + else + rhs.find_index {|token| token.referred_by?(ref_name) } + 1 + end + [ref[0], value, ref[2], ref[3], ref[4]] + else + ref + end + } + end + @i = 0 @types = [] @@ -47,6 +72,7 @@ module Lrama define_type(:Number) # 0 define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there) define_type(:Ident) # api.pure, tNUMBER + define_type(:Named_Ref) # [foo] define_type(:Semicolon) # ; define_type(:Bar) # | define_type(:String) # "str" @@ -166,10 +192,15 @@ module Lrama tokens << create_token(Token::Number, Integer(ss[0]), line, ss.pos - column) when ss.scan(/(<[a-zA-Z0-9_]+>)/) tokens << create_token(Token::Tag, ss[0], line, ss.pos - column) + when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]\s*:/) + tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column) + tokens << create_token(Token::Named_Ref, ss[2], line, ss.pos - column) when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\s*:/) tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column) when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/) tokens << create_token(Token::Ident, ss[0], line, ss.pos - column) + when ss.scan(/\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) + tokens << create_token(Token::Named_Ref, ss[1], line, ss.pos - column) when ss.scan(/%expect/) tokens << create_token(Token::P_expect, ss[0], line, ss.pos - column) when ss.scan(/%define/) @@ -257,6 +288,9 @@ module Lrama when ss.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1 tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil references << [:dollar, Integer(ss[2]), tag, str.length, str.length + ss[0].length - 1] + when ss.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_.][-a-zA-Z0-9_.]*)/) # $foo, $expr, $<long>program + tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil + references << [:dollar, ss[2], tag, str.length, str.length + ss[0].length - 1] when ss.scan(/@\$/) # @$ references << [:at, "$", nil, str.length, str.length + ss[0].length - 1] when ss.scan(/@(\d)+/) # @1 diff --git a/tool/lrama/lib/lrama/parser.rb b/tool/lrama/lib/lrama/parser.rb index a2d3b9e0d4..7d83e45dde 100644 --- a/tool/lrama/lib/lrama/parser.rb +++ b/tool/lrama/lib/lrama/parser.rb @@ -175,8 +175,11 @@ module Lrama # LHS lhs = ts.consume!(T::Ident_Colon) # class: lhs.type = T::Ident + if named_ref = ts.consume(T::Named_Ref) + lhs.alias = named_ref.s_value + end - rhs = parse_grammar_rule_rhs(ts, grammar) + rhs = parse_grammar_rule_rhs(ts, grammar, lhs) grammar.add_rule(lhs: lhs, rhs: rhs, lineno: rhs.first ? rhs.first.line : lhs.line) @@ -186,7 +189,7 @@ module Lrama # | bar_lineno = ts.current_token.line ts.next - rhs = parse_grammar_rule_rhs(ts, grammar) + rhs = parse_grammar_rule_rhs(ts, grammar, lhs) grammar.add_rule(lhs: lhs, rhs: rhs, lineno: rhs.first ? rhs.first.line : bar_lineno) when T::Semicolon # ; @@ -205,13 +208,13 @@ module Lrama end end - def parse_grammar_rule_rhs(ts, grammar) + def parse_grammar_rule_rhs(ts, grammar, lhs) a = [] prec_seen = false code_after_prec = false while true do - # TODO: Srting can be here + # TODO: String can be here case ts.current_type when T::Ident # keyword_class @@ -244,9 +247,13 @@ module Lrama end code = ts.current_token + code.numberize_references(lhs, a) grammar.build_references(code) a << code ts.next + when T::Named_Ref + ts.previous_token.alias = ts.current_token.s_value + ts.next when T::Bar # | break diff --git a/tool/lrama/lib/lrama/parser/token_scanner.rb b/tool/lrama/lib/lrama/parser/token_scanner.rb index b9c1522aff..1ff67b30a3 100644 --- a/tool/lrama/lib/lrama/parser/token_scanner.rb +++ b/tool/lrama/lib/lrama/parser/token_scanner.rb @@ -14,6 +14,10 @@ module Lrama current_token && current_token.type end + def previous_token + @tokens[@index - 1] + end + def next token = current_token @index += 1 diff --git a/tool/lrama/lib/lrama/version.rb b/tool/lrama/lib/lrama/version.rb index 0054f3d0bd..2adbfd70fb 100644 --- a/tool/lrama/lib/lrama/version.rb +++ b/tool/lrama/lib/lrama/version.rb @@ -1,3 +1,3 @@ module Lrama - VERSION = "0.5.1".freeze + VERSION = "0.5.2".freeze end |