# From "Dominik Bathon" <dbatml gmx.de>
# http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/188420
class MarkovChainer
  attr_reader :order
  def initialize(order)
    @order = order
    @beginnings = []
    @freq = {}
  end

  def add_set(set)
    set.each do |s|
      #terminator = s.split(/\s+/).pop
      text = "#{s}."
      seps = /([.!?])/
      sentence = ""
      text.split(seps).each { |p|
        if seps =~ p
          add_sentence(sentence, p)
          sentence = ""
        else
          sentence = p
        end
      }
    end
  end

  def add_text(text)
    # make sure each paragraph ends with some sentence terminator
    text.gsub!(/\n\s*\n/m, ".")
    text << "."
    seps = /([.!?])/
    sentence = ""
    text.split(seps).each { |p|
      if seps =~ p
        add_sentence(sentence, p)
        sentence = ""
      else
        sentence = p
      end
    }
  end

  def generate_sentence res=nil
    res ||= [@beginnings[rand(@beginnings.size)]]
    i = 0
    loop {
      i += 1
      if nw = next_word_for(res[-order, order])
        res << nw
      else
        res += random_words.reject{|x| x == "."}
      end
      #  return (res[0..-2].join(" ") + res.last).sub(/_$/, "")
      return res.join(" ").sub(/\s+\.$/, "") if nw == '.' or i > 50 or res.length > 20
    }
  end

  private

  def add_sentence(str, terminator)
    words = str.scan(/[\w:,\-']+/)
    return unless words.size > order # ignore short sentences
    words << terminator
    buf = []
    words.each { |w|
      buf << w
      if buf.size == order + 1
        (@freq[buf[0..-2]] ||= []) << buf[-1]
        buf.shift
      end
    }
    @beginnings << words[0, order]
  end

  def next_word_for(words)
    arr = @freq[words]
    arr && arr[rand(arr.size)]
  end

  def random_words
    arr = @freq[@freq.keys[rand(@freq.keys.size)]]
    r = arr && arr[rand(arr.size)]
    [r, next_word_for([r])]
  end
end


