summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--tool/lrama/LEGAL.md26
-rw-r--r--tool/lrama/MIT21
-rw-r--r--tool/lrama/lib/lrama/grammar.rb50
-rw-r--r--tool/lrama/lib/lrama/lexer.rb36
-rw-r--r--tool/lrama/lib/lrama/parser.rb15
-rw-r--r--tool/lrama/lib/lrama/parser/token_scanner.rb4
-rw-r--r--tool/lrama/lib/lrama/version.rb2
7 files changed, 127 insertions, 27 deletions
diff --git a/tool/lrama/LEGAL.md b/tool/lrama/LEGAL.md
new file mode 100644
index 0000000000..b1a15b96b5
--- /dev/null
+++ b/tool/lrama/LEGAL.md
@@ -0,0 +1,26 @@
+# LEGAL NOTICE INFORMATION
+
+All the files in this distribution are covered under the MIT License except some files
+mentioned below.
+
+## GNU General Public License version 3
+
+These files are licensed under the GNU General Public License version 3. See these files for more information.
+
+* template/bison/yacc.c
+* template/bison/yacc.h
+
+## Same with Ruby
+
+These files are licensed same with Ruby. See https://github.com/ruby/ruby/blob/master/COPYING for more information.
+
+* spec/fixtures/integration/ruby_3_0_5/parse.tmp.y
+* spec/fixtures/integration/ruby_3_0_5/y.tab.c
+* spec/fixtures/integration/ruby_3_0_5/y.tab.h
+* spec/fixtures/integration/ruby_3_1_0/parse.tmp.y
+* spec/fixtures/integration/ruby_3_1_0/y.tab.c
+* spec/fixtures/integration/ruby_3_1_0/y.tab.h
+* spec/fixtures/integration/ruby_3_2_0/parse.tmp.y
+* spec/fixtures/integration/ruby_3_2_0/y.tab.c
+* spec/fixtures/integration/ruby_3_2_0/y.tab.h
+
diff --git a/tool/lrama/MIT b/tool/lrama/MIT
new file mode 100644
index 0000000000..b23d5210d5
--- /dev/null
+++ b/tool/lrama/MIT
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2023 Yuichiro Kaneko
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/tool/lrama/lib/lrama/grammar.rb b/tool/lrama/lib/lrama/grammar.rb
index 1daec4446b..a13c5807b2 100644
--- a/tool/lrama/lib/lrama/grammar.rb
+++ b/tool/lrama/lib/lrama/grammar.rb
@@ -155,16 +155,16 @@ module Lrama
last_column = ref.last_column
case
- when ref.number == "$" && ref.type == :dollar # $$
+ when ref.value == "$" && ref.type == :dollar # $$
# Omit "<>"
member = tag.s_value[1..-2]
str = "((*yyvaluep).#{member})"
- when ref.number == "$" && ref.type == :at # @$
+ when ref.value == "$" && ref.type == :at # @$
str = "(*yylocationp)"
when ref.type == :dollar # $n
- raise "$#{ref.number} can not be used in %printer."
+ raise "$#{ref.value} can not be used in %printer."
when ref.type == :at # @n
- raise "@#{ref.number} can not be used in %printer."
+ raise "@#{ref.value} can not be used in %printer."
else
raise "Unexpected. #{self}, #{ref}"
end
@@ -190,19 +190,19 @@ module Lrama
last_column = ref.last_column
case
- when ref.number == "$" && ref.type == :dollar # $$
+ when ref.value == "$" && ref.type == :dollar # $$
# Omit "<>"
member = ref.tag.s_value[1..-2]
str = "(yyval.#{member})"
- when ref.number == "$" && ref.type == :at # @$
+ when ref.value == "$" && ref.type == :at # @$
str = "(yyloc)"
when ref.type == :dollar # $n
- i = -ref.position_in_rhs + ref.number
+ i = -ref.position_in_rhs + ref.value
# Omit "<>"
member = ref.tag.s_value[1..-2]
str = "(yyvsp[#{i}].#{member})"
when ref.type == :at # @n
- i = -ref.position_in_rhs + ref.number
+ i = -ref.position_in_rhs + ref.value
str = "(yylsp[#{i}])"
else
raise "Unexpected. #{self}, #{ref}"
@@ -226,14 +226,14 @@ module Lrama
last_column = ref.last_column
case
- when ref.number == "$" && ref.type == :dollar # $$
+ when ref.value == "$" && ref.type == :dollar # $$
str = "yylval"
- when ref.number == "$" && ref.type == :at # @$
+ when ref.value == "$" && ref.type == :at # @$
str = "yylloc"
when ref.type == :dollar # $n
- raise "$#{ref.number} can not be used in initial_action."
+ raise "$#{ref.value} can not be used in initial_action."
when ref.type == :at # @n
- raise "@#{ref.number} can not be used in initial_action."
+ raise "@#{ref.value} can not be used in initial_action."
else
raise "Unexpected. #{self}, #{ref}"
end
@@ -247,7 +247,7 @@ module Lrama
# type: :dollar or :at
# ex_tag: "$<tag>1" (Optional)
- Reference = Struct.new(:type, :number, :ex_tag, :first_column, :last_column, :referring_symbol, :position_in_rhs, keyword_init: true) do
+ Reference = Struct.new(:type, :value, :ex_tag, :first_column, :last_column, :referring_symbol, :position_in_rhs, keyword_init: true) do
def tag
if ex_tag
ex_tag
@@ -382,8 +382,8 @@ module Lrama
end
def build_references(token_code)
- token_code.references.map! do |type, number, tag, first_column, last_column|
- Reference.new(type: type, number: number, ex_tag: tag, first_column: first_column, last_column: last_column)
+ token_code.references.map! do |type, value, tag, first_column, last_column|
+ Reference.new(type: type, value: value, ex_tag: tag, first_column: first_column, last_column: last_column)
end
token_code
@@ -627,15 +627,23 @@ module Lrama
ref.position_in_rhs = i - 1
next if ref.type == :at
# $$, $n, @$, @n can be used in any actions
- number = ref.number
- if number == "$"
+ if ref.value == "$"
# TODO: Should be postponed after middle actions are extracted?
ref.referring_symbol = lhs
- else
- raise "Can not refer following component. #{number} >= #{i}. #{token}" if number >= i
- rhs1[number - 1].referred = true
- ref.referring_symbol = rhs1[number - 1]
+ elsif ref.value.is_a?(Integer)
+ raise "Can not refer following component. #{ref.value} >= #{i}. #{token}" if ref.value >= i
+ rhs1[ref.value - 1].referred = true
+ ref.referring_symbol = rhs1[ref.value - 1]
+ elsif ref.value.is_a?(String)
+ target_tokens = ([lhs] + rhs1 + [code]).compact.first(i)
+ referring_symbol_candidate = target_tokens.filter {|token| token.referred_by?(ref.value) }
+ raise "Referring symbol `#{ref.value}` is duplicated. #{token}" if referring_symbol_candidate.size >= 2
+ raise "Referring symbol `#{ref.value}` is not found. #{token}" if referring_symbol_candidate.count == 0
+
+ referring_symbol = referring_symbol_candidate.first
+ referring_symbol.referred = true
+ ref.referring_symbol = referring_symbol
end
end
end
diff --git a/tool/lrama/lib/lrama/lexer.rb b/tool/lrama/lib/lrama/lexer.rb
index 6c1139b416..fd79a46bfc 100644
--- a/tool/lrama/lib/lrama/lexer.rb
+++ b/tool/lrama/lib/lrama/lexer.rb
@@ -7,7 +7,7 @@ module Lrama
include Lrama::Report::Duration
# s_value is semantic value
- Token = Struct.new(:type, :s_value, keyword_init: true) do
+ Token = Struct.new(:type, :s_value, :alias, keyword_init: true) do
Type = Struct.new(:id, :name, keyword_init: true)
attr_accessor :line, :column, :referred
@@ -18,6 +18,31 @@ module Lrama
"#{super} line: #{line}, column: #{column}"
end
+ def referred_by?(string)
+ [self.s_value, self.alias].include?(string)
+ end
+
+ def ==(other)
+ self.class == other.class && self.type == other.type && self.s_value == other.s_value
+ end
+
+ def numberize_references(lhs, rhs)
+ self.references.map! {|ref|
+ ref_name = ref[1]
+ if ref_name.is_a?(String) && ref_name != '$'
+ value =
+ if lhs.referred_by?(ref_name)
+ '$'
+ else
+ rhs.find_index {|token| token.referred_by?(ref_name) } + 1
+ end
+ [ref[0], value, ref[2], ref[3], ref[4]]
+ else
+ ref
+ end
+ }
+ end
+
@i = 0
@types = []
@@ -47,6 +72,7 @@ module Lrama
define_type(:Number) # 0
define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there)
define_type(:Ident) # api.pure, tNUMBER
+ define_type(:Named_Ref) # [foo]
define_type(:Semicolon) # ;
define_type(:Bar) # |
define_type(:String) # "str"
@@ -166,10 +192,15 @@ module Lrama
tokens << create_token(Token::Number, Integer(ss[0]), line, ss.pos - column)
when ss.scan(/(<[a-zA-Z0-9_]+>)/)
tokens << create_token(Token::Tag, ss[0], line, ss.pos - column)
+ when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]\s*:/)
+ tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
+ tokens << create_token(Token::Named_Ref, ss[2], line, ss.pos - column)
when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\s*:/)
tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
tokens << create_token(Token::Ident, ss[0], line, ss.pos - column)
+ when ss.scan(/\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/)
+ tokens << create_token(Token::Named_Ref, ss[1], line, ss.pos - column)
when ss.scan(/%expect/)
tokens << create_token(Token::P_expect, ss[0], line, ss.pos - column)
when ss.scan(/%define/)
@@ -257,6 +288,9 @@ module Lrama
when ss.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
references << [:dollar, Integer(ss[2]), tag, str.length, str.length + ss[0].length - 1]
+ when ss.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_.][-a-zA-Z0-9_.]*)/) # $foo, $expr, $<long>program
+ tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
+ references << [:dollar, ss[2], tag, str.length, str.length + ss[0].length - 1]
when ss.scan(/@\$/) # @$
references << [:at, "$", nil, str.length, str.length + ss[0].length - 1]
when ss.scan(/@(\d)+/) # @1
diff --git a/tool/lrama/lib/lrama/parser.rb b/tool/lrama/lib/lrama/parser.rb
index a2d3b9e0d4..7d83e45dde 100644
--- a/tool/lrama/lib/lrama/parser.rb
+++ b/tool/lrama/lib/lrama/parser.rb
@@ -175,8 +175,11 @@ module Lrama
# LHS
lhs = ts.consume!(T::Ident_Colon) # class:
lhs.type = T::Ident
+ if named_ref = ts.consume(T::Named_Ref)
+ lhs.alias = named_ref.s_value
+ end
- rhs = parse_grammar_rule_rhs(ts, grammar)
+ rhs = parse_grammar_rule_rhs(ts, grammar, lhs)
grammar.add_rule(lhs: lhs, rhs: rhs, lineno: rhs.first ? rhs.first.line : lhs.line)
@@ -186,7 +189,7 @@ module Lrama
# |
bar_lineno = ts.current_token.line
ts.next
- rhs = parse_grammar_rule_rhs(ts, grammar)
+ rhs = parse_grammar_rule_rhs(ts, grammar, lhs)
grammar.add_rule(lhs: lhs, rhs: rhs, lineno: rhs.first ? rhs.first.line : bar_lineno)
when T::Semicolon
# ;
@@ -205,13 +208,13 @@ module Lrama
end
end
- def parse_grammar_rule_rhs(ts, grammar)
+ def parse_grammar_rule_rhs(ts, grammar, lhs)
a = []
prec_seen = false
code_after_prec = false
while true do
- # TODO: Srting can be here
+ # TODO: String can be here
case ts.current_type
when T::Ident
# keyword_class
@@ -244,9 +247,13 @@ module Lrama
end
code = ts.current_token
+ code.numberize_references(lhs, a)
grammar.build_references(code)
a << code
ts.next
+ when T::Named_Ref
+ ts.previous_token.alias = ts.current_token.s_value
+ ts.next
when T::Bar
# |
break
diff --git a/tool/lrama/lib/lrama/parser/token_scanner.rb b/tool/lrama/lib/lrama/parser/token_scanner.rb
index b9c1522aff..1ff67b30a3 100644
--- a/tool/lrama/lib/lrama/parser/token_scanner.rb
+++ b/tool/lrama/lib/lrama/parser/token_scanner.rb
@@ -14,6 +14,10 @@ module Lrama
current_token && current_token.type
end
+ def previous_token
+ @tokens[@index - 1]
+ end
+
def next
token = current_token
@index += 1
diff --git a/tool/lrama/lib/lrama/version.rb b/tool/lrama/lib/lrama/version.rb
index 0054f3d0bd..2adbfd70fb 100644
--- a/tool/lrama/lib/lrama/version.rb
+++ b/tool/lrama/lib/lrama/version.rb
@@ -1,3 +1,3 @@
module Lrama
- VERSION = "0.5.1".freeze
+ VERSION = "0.5.2".freeze
end