chart_parser.rb

   1 #!/usr/bin/env ruby
   2
   3 require 'set'
   4 require 'logger'
   5
   6 class Terminal
   7   attr_accessor :token
   8   alias_method :to_s, :token
   9   alias_method :to_tree, :token
  10   def initialize(token)
  11     @token=token
  12   end
  13   def eql?(other)
  14     other.is_a?(Terminal) and other.token==@token
  15   end
  16   alias_method :==, :eql?
  17   alias_method :===, :eql?
  18 end
  19
  20 class Edge
  21   attr_accessor :i,:j,:category,:contents,:sub
  22   def initialize(i,j,category,contents,sub=[])
  23     @i=i
  24     @j=j
  25     @category=category
  26     @contents=contents
  27     @sub=sub
  28   end
  29   def eql?(other)
  30     other.is_a? Edge and hash == other.hash
  31   end
  32   alias_method :==, :eql?
  33   alias_method :===, :eql?
  34   def hash
  35     [@i,@j,@category,@contents,@sub].hash
  36   end
  37   def to_s
  38     "#{active? ? 'A' : 'I'}"+
  39     "<#{@i},#{@j},#{@category},[%s],[%s]>" % [@contents.join(', '),
  40                                               @sub.join(', ')]
  41   end
  42   def active?
  43     unfound and unfound.length != 0
  44   end
  45   def unfound
  46     @contents[@sub.length..-1]
  47   end
  48   def nextWord
  49     unfound[0]
  50   end
  51   def to_tree
  52     #raise Exception if active?
  53     unless @sub.empty?
  54       [@category, @sub.map{|e|e.to_tree}]
  55     else
  56       [@category, @contents.map{|e|e.to_tree}]
  57     end
  58   end
  59 end
  60
  61 class Parser
  62   def initialize(grammar,log=Logger.new(STDERR))
  63     @grammar = grammar
  64     @log = log
  65   end
  66
  67   def parse(text)
  68     chart = Set.new
  69     agenda = []
  70
  71     # Initialise agenda with known words
  72     text.each_with_index do |word,n|
  73       cat = nil
  74       @grammar.each do |cat,rule|
  75         for r in rule
  76           if r == [Terminal.new(word)]
  77             agenda << Edge.new(n,n+1,cat,[],[Terminal.new(word)])
  78           end
  79         end
  80       end
  81     end
  82
  83     until agenda.empty?
  84       # Move one from agenda to chart
  85       chart << agenda.delete_at(0)
  86       # Add to the agenda from edges in the chart
  87       new_edges(chart) do |edge|
  88         unless agenda.include?(edge) or chart.include?(edge)
  89           @log.debug("new_edges adding: #{edge}")
  90           agenda << edge
  91         end
  92       end
  93
  94       agenda.each do |edge|
  95         @log.debug("agenda: #{edge}")
  96       end
  97       chart.each do |edge|
  98         @log.debug("chart: #{edge}")
  99       end
 100       #gets
 101     end
 102
 103     # Return all spanning parses in Array form
 104     chart.select do |edge|
 105       edge.i == 0 and edge.j == text.length
 106     end.map do |edge|
 107       edge.to_tree
 108     end.to_set
 109   end
 110
 111   def new_edges(chart)
 112     bottom_up(chart) do |edge|
 113       yield edge
 114     end
 115     fundamental(chart) do |edge|
 116       yield edge
 117     end
 118   end
 119
 120   def bottom_up(chart)
 121     chart.each do |edge|
 122       #TODO: is this next line required by the algorithm?
 123       next if edge.active?
 124       @grammar.each do |r,w|
 125         w.each do |ww|
 126           if ww[0] == edge.category
 127             newedge = Edge.new(edge.i,edge.i,r,ww)
 128             @log.debug("bottom_up: via #{edge} found #{newedge}")
 129             yield newedge
 130           end
 131         end
 132       end
 133     end
 134   end
 135
 136   def fundamental(chart)
 137     inactive = Set.new
 138     active = Set.new
 139     #TODO: (optimisation)
 140     # This could be removed if the chart was split into inactive and active
 141     chart.each do |edge|
 142       if edge.active?
 143         active << edge
 144       else
 145         inactive << edge
 146       end
 147     end
 148
 149     for a in active
 150       for i in inactive
 151         if a.j == i.i and a.nextWord == i.category
 152           newedge = Edge.new(a.i,i.j,a.category,a.contents,a.sub+[i])
 153           @log.debug("fundamental: via #{a} and #{i} found #{newedge}")
 154           yield newedge
 155         end
 156       end
 157     end
 158   end
 159 end
 160
 161 if __FILE__ == $0
 162
 163   log = Logger.new(STDERR)
 164   log.level = Logger::DEBUG
 165   log.formatter = lambda {|severity,time,progname,msg| "#{severity}: #{msg}\n"}
 166
 167   shouldBe = Set.new([["S", [["NP", [["NP", ["time"]], ["NP", ["flies"]]]],
 168   ["VP", [["V", ["like"]], ["NP", [["Det", ["an"]], ["NP", ["arrow"]]]]]]]],
 169   ["S", [["NP", ["time"]], ["VP", [["V", ["flies"]], ["PP", [["PR", ["like"]],
 170   ["NP", [["Det", ["an"]], ["NP", ["arrow"]]]]]]]]]]])
 171
 172   grammar = {'S'=>[%w{NP VP}],
 173              'VP'=>[%w{V NP},
 174                     %w{V PP}],
 175              'NP'=>[[Terminal.new('time')],
 176                     [Terminal.new('flies')],
 177                     [Terminal.new('arrow')],
 178                     %w{Det NP},
 179                     %w{NP NP}],
 180              'PP'=>[%w{PR NP}],
 181              'PR'=>[[Terminal.new('like')]],
 182              'Det'=>[[Terminal.new('an')]],
 183              'V'=>[[Terminal.new('like')],
 184                    [Terminal.new('flies')]]}
 185
 186   p = Parser.new(grammar,log)
 187   q = p.parse(%w{time flies like an arrow})
 188
 189   if q == shouldBe
 190     puts "Test succeeded!"
 191     puts "Output was #{q.inspect}"
 192   else
 193     puts "Test failed!"
 194     puts "Should have been #{shouldBe.inspect}"
 195     puts "But was actually #{q.inspect}"
 196   end
 197 end