#!/usr/bin/ruby -w
#!ruby -w
# vim: set filetype=ruby : set sw=2

# An extended grep, with extended functionality including full regular
# expressions, contextual output, highlighting, detection and exclusion of
# nontext files, and complex matching criteria.

# $Id: glark.rb,v 1.2 2006/08/29 22:28:33 jeugenepace Exp $

require "English"
require 'singleton'
require 'pathname'

require 'glark/text'
require 'glark/regexp'
require 'glark/io'
require 'glark/log'
require 'glark/filetest'
require 'glark/options'
require 'glark/extensions'

$stdout.sync = true             # unbuffer
$stderr.sync = true             # unbuffer

$PACKAGE = "glark"
$VERSION = "1.8.0"


# -------------------------------------------------------
# Input file
# -------------------------------------------------------

# A thing that can be grepped.
class InputFile
  include Loggable

  attr_reader :fname, :stati
  attr_accessor :count, :output, :invert_match

  # cross-platform end of line:   DOS  UNIX  MAC
  ANY_END_OF_LINE = Regexp.new('(?:\r\n|\n|\r)')

  WRITTEN = "written"
  
  def initialize(fname, io, args = GlarkOptions.instance)
    @fname        = fname
    @io           = io
    @stati        = Array.new      # index = line number, value = context character
    @count        = nil
    @extracted    = nil
    @regions      = nil
    @modlines     = nil
    @invert_match = false
    @linecount    = nil
    @readall      = $/ != "\n"
    @lines        = @readall ? IO.readlines(@fname) : Array.new

    @after        = args[:after]
    @before       = args[:before]
    @output       = args[:output]

    @matched      = false
  end
  
  def linecount
    @linecount ||= begin
                     IO.readlines(@fname).size
                   end
  end

  def matched?
    @matched
  end

  def each_line
    if @readall
      @lines.each do |line|
        yield line
      end
    else
      while (line = @io.gets) && line.length > 0
        @lines << line
        yield line
      end
    end
  end

  def set_status(from, to, ch, force = false)
    from.upto(to) do |ln|
      if (not @stati[ln]) || (@stati[ln] != WRITTEN && force)
        @stati[ln] = ch
      end
    end
  end

  def mark_as_match(start_line, end_line = start_line)
    @matched = true

    # even with multi-line matches (--and expressions), we'll display
    # only the first matching line, not the range between the matches.

    if @output == "grep"
      end_line = start_line
    end

    if @count
      @count += 1
    else
      st = [0, start_line - @before].max
      set_status(st,           start_line - 1,    "-")
      set_status(start_line,   end_line,          ":",  true)
      set_status(end_line + 1, end_line + @after, "+")
    end
  end

  def write_matches(matching, from = nil, to = nil)
    @output.write_matches(matching, from, to)
  end

  def write_all
    @output.write_all
  end

  # Returns the lines for this file, separated by end of line sequences.
  def get_lines
    if $/ == "\n"
      @lines
    else
      @extracted ||= begin
                       # This is much easier. Just resplit the whole thing at end of line
                       # sequences.
                       
                       eoline    = "\n"             # should be OS-dependent
                       srclines  = @lines
                       reallines = @lines.join("").split(ANY_END_OF_LINE)
                       
                       # "\n" after all but the last line
                       extracted = (0 ... (reallines.length - 1)).collect do |lnum|
                         reallines[lnum] + eoline
                       end
                       extracted << reallines[-1]

                       if Log.verbose
                         extracted.each_with_index do |line, idx|
                           log "extracted[#{idx}]: #{@extracted[idx]}"
                         end
                       end
                       extracted
                     end
    end
  end

  # Returns the given line for this file. For this method, a line ends with a
  # CR, as opposed to the "lines" method, which ends with $/.
  def get_line(lnum)
    log { "lnum: #{lnum}" }
    ln = get_lines()[lnum]
    log { "ln: #{ln}" }
    ln
  end

  # returns the range that is represented by the region number
  def get_range(rnum)
    if $/ == "\n"
      # easy case: range is the range number, unless it is out of range.
      rnum < @lines.length ? (rnum .. rnum) : nil
    else
      unless @regions
        srclines = @modlines ? @modlines : @lines

        @regions = []           # keys = region number; values = range of lines

        lstart = 0
        srclines.each do |line|
          lend = lstart
          line.scan(ANY_END_OF_LINE).each do |cr|
            lend += 1
          end

          @regions << Range.new(lstart, lend - 1)

          lstart = lend
        end
      end

      @regions[rnum]
    end
  end
end


# -------------------------------------------------------
# Binary input file
# -------------------------------------------------------

class BinaryFile < InputFile

  def write_matches(matching, from, to)
    if count
      write_count(matching)
    else
      puts "Binary file " + @fname + " matches"
    end
  end

end


# -------------------------------------------------------
# Output
# -------------------------------------------------------

class Results
  include Loggable
  
end

class OutputFormat < Results

  attr_reader :formatted, :infile, :show_file_name, :has_context

  def initialize(infile, show_file_names)
    @infile            = infile
    @show_file_name    = show_file_names
    @formatted         = []
    @has_context       = false

    opts               = GlarkOptions.instance

    @label             = opts.label
    @out               = opts.out
    @show_break        = opts.show_break
    @show_line_numbers = opts.show_line_numbers
  end

  # Prints the line, which is assumed to be 0-indexed, and is thus adjusted by
  # one.
  def print_line_number(lnum)
    @out.printf "%5d ", lnum + 1
  end

  # prints the line, and adjusts for the fact that in our world, lines are
  # 0-indexed, whereas they are displayed as if 1-indexed.
  def print_line(lnum, ch = nil)
    log { "lnum #{lnum}, ch: '#{ch}'" }
    begin
      lnums = @infile.get_range(lnum)
      log { "lnums(#{lnum}): #{lnums}" }
      if lnums
        lnums.each do |ln|
          if show_line_numbers
            print_line_number(ln)
            if ch && has_context
              @out.printf "%s ", ch
            end
          end
          line = @formatted[ln] || @infile.get_line(ln)
          @out.puts line
        end
      end
    rescue => e
      # puts e
      # puts e.backtrace
    end
  end

  def write_matches(matching, from, to)
    if @infile.count
      write_count(matching)
    elsif matching
      firstline = from ? from : 0
      lastline  = to   ? to   : @infile.get_lines.length - 1

      (firstline .. lastline).each do |ln|
        if @infile.stati[ln]
          unless @infile.stati[ln] == InputFile::WRITTEN
            if firstline > 0 && !@infile.stati[ln - 1] && has_context && @show_break
              @out.puts "  ---"
            end
            
            print_line(ln, @infile.stati[ln]) 

            @infile.stati[ln] = InputFile::WRITTEN
          end
        end

      end
    else
      firstline = from ? from : 0
      lastline  = to ? to : @infile.get_lines.length - 1
      (firstline .. lastline).each do |ln|
        if @infile.stati[ln] != InputFile::WRITTEN && @infile.stati[ln] != ":"
          log { "printing #{ln}" }
          print_line(ln)
          @infile.stati[ln] = InputFile::WRITTEN
        end
      end
    end
  end

  def write_all
    (0 ... @infile.get_lines.length).each do |ln|
      print_line(ln) 
    end
  end

  def get_line_to_print(lnum)
    formatted[lnum] || infile.get_line(lnum)
  end

  def show_line_numbers
    @show_line_numbers
  end

end


# -------------------------------------------------------
# Glark output format
# -------------------------------------------------------

class GlarkOutputFormat < OutputFormat

  def initialize(infile, show_file_names)
    super

    opts = GlarkOptions.instance

    @has_context = opts.after != 0 || opts.before != 0
    @file_header_shown = false
    if @highlight = opts.highlight
      @fname_highlighter = opts.file_highlight
    end
    @lnum_highlighter = opts.line_number_highlight
  end

  # prints the line, and adjusts for the fact that in our world, lines are
  # 0-indexed, whereas they are displayed as if 1-indexed.
  def print_line(lnum, ch = nil)
    log { "lnum #{lnum}, ch: '#{ch}'" }
    begin
      lnums = @infile.get_range(lnum)
      log { "lnums(#{lnum}): #{lnums}" }
      if lnums
        log { "printing" }
        lnums.each do |ln|
          println(ln, ch)
        end
      end
    rescue => e
      # puts e
      # puts e.backtrace
    end
  end

  def show_file_header
    if show_file_name && !@file_header_shown
      fname = @label || @infile.fname
      fname = @fname_highlighter.highlight(fname) if @highlight
      
      @out.puts fname
    end
    @file_header_shown = true
  end

  def print_line_number(lnum)
    if @lnum_highlighter
      lnumstr = (lnum + 1).to_s
      pad = " " * ([5 - lnumstr.length, 0].max)
      @out.print pad + " " + @lnum_highlighter.highlight(lnumstr) + " "
    else
      super
    end
  end
 
  def write_count(matching = true)
    ct = matching ? @infile.count : @infile.get_lines.size - @infile.count
    @out.puts "    " + ct.to_s
  end

  def write_matches(matching, from = nil, to = nil)
    show_file_header
    super(matching, from, to)
  end

  def write_all
    show_file_header
    super
  end

  def println(ln, ch)
    if show_line_numbers
      print_line_number(ln)
    end
    
    if ch && has_context
      @out.printf "%s ", ch
    end

    line = get_line_to_print(ln)
    log { "line: #{line}" }
    
    @out.puts line
  end

end


class GlarkMatchList < GlarkOutputFormat
  attr_reader :matches

  def initialize(infile, show_file_names)
    super
    @matches = Array.new
  end

  def write_matches(matching, from, to)
    stack "matching: #{matching}"
    from.upto(to) do |line|
      @matches[line] = true
    end
    log { "matches: #{@matches}" }
  end

end


# -------------------------------------------------------
# Grep output format
# -------------------------------------------------------

# This matches grep, mostly. It is for running within emacs, thus,
# it does not support context or highlighting.
class GrepOutputFormat < OutputFormat

  def write_count(matching = true)
    print_file_name
    ct = matching ? @infile.count : @infile.get_lines.length - @infile.count
    puts ct
  end

  # prints the line, and adjusts for the fact that in our world, lines are
  # 0-indexed, whereas they are displayed as if 1-indexed.
  def print_line(lnum, ch = nil)
    ln = get_line_to_print(lnum)

    if ln
      print_file_name
      if show_line_numbers
        printf "%d: ", lnum + 1
      end
      
      print ln
    end
  end

  def print_file_name
    if show_file_name
      fname = @label || @infile.fname
      print @infile.fname, ":"
    end
  end

end


# -------------------------------------------------------
# Glark
# -------------------------------------------------------

# The main processor.
class Glark 
  include Loggable

  attr_reader :exit_status
  
  def initialize(func, files)
    @opts  = GlarkOptions.instance
    @func  = func
    @searched_files = Array.new          # files searched, so we don't cycle through links
    
    @files = files

    @show_file_names = (@opts.show_file_names ||
                        (@opts.show_file_names.nil? && 
                         (@opts.label ||
                          @files.size > 1 ||
                          (@files[0] != "-" && FileTester.type(@files[0]) == FileTester::DIRECTORY))))

    @out_class = case @opts.output
                 when "grep"
                   GrepOutputFormat
                 when "ansi", "xterm", nil
                   GlarkOutputFormat
                 when "match"
                   error "output to match list is not yet supported"
                   GlarkMatchList
                   # exit 2
                 end

    @count        = @opts.count
    @invert_match = @opts.invert_match

    @after  = @opts.after
    @before = @opts.before
    @output = @opts.output

    # 0 == matches, 1 == no matches, 2 == error
    @exit_status = @invert_match ? 0 : 1

    @skip_methods = Array.new

    if @opts.with_basename || @opts.without_basename
      @skip_methods << Proc.new { |fn| skip?(File.basename(fn), @opts.with_basename, @opts.without_basename) }
    end

    if @opts.with_fullname || @opts.without_fullname
      @skip_methods << Proc.new { |fn| skip?(fn, @opts.with_fullname, @opts.without_fullname) }
    end
    
    if @opts.size_limit
      @skip_methods << Proc.new { |fn| File.size(fname) > @opts.size_limit }
    end    
  end

  def search_file(input)
    output       = @out_class.new(input, @show_file_names)
    input.output = output

    input.count        = 0    if @count
    input.invert_match = true if @invert_match
    
    @func.process(input)

    if input.matched?
      @exit_status = @invert_match ? 1 : 0
    end
  end

  def search_binary_file(fname)
    log { "searching binary file #{fname} for #{@func}" }
    f = File.new(fname)
    f.binmode                # for MSDOS/WinWhatever

    bfile_args = {
      :after  => @after,
      :before => @before,
      :output => @output
    }
    
    bf = BinaryFile.new(fname, f, bfile_args)
    search_file(bf)
  end

  def skip?(name, opts_with, opts_without)
    inc = opts_with    && !opts_with.match(name)
    exc = opts_without &&  opts_without.match(name)
    inc || exc
  end

  def skipped?(fname)
    @skip_methods.detect { |meth| meth.call(fname) }
  end

  def search_text(fname)
    if skipped?(fname)
      log { "skipping file: #{fname}" }
    else
      log { "searching text" }
      if false
        # readlines doesn't work with $/ == nil, so we'll use gets instead.
        # this has been fixed in the CVS version of Ruby (on 26 Dec 2003).
        text = []
        File.open(fname) do |f|
          while ((line = f.gets) && line.length > 0)
            text << line
          end
        end
        log { "got text #{text.length}" }
      end
      log { "searching #{fname} for #{@func}" }

      ifile_args = {
        :after  => @after,
        :before => @before,
        :output => @output
      }

      io = fname == "-" ? $stdin : File.new(fname)

      input = InputFile.new(fname, io, ifile_args)
      search_file(input)
    end
  end

  def search_binary(fname)
    if skipped?(fname)
      log { "skipping file: #{fname}" }
    else
      log { "handling binary" }
      
      case @opts.binary_files
      when "without-match"
        log { "skipping binary file #{fname}" }
        
      when "binary"
        log { "searching binary file #{fname} for #{@func}" }
        f = File.new(fname)
        f.binmode                # for MSDOS/WinWhatever
        bf = BinaryFile.new(fname, f)
        search_file(bf)
        
      when "text"
        log { "processing binary file #{name} as text" }
        search_text(fname)
      end
    end
  end

  def search_directory(fname)
    log { "processing directory" }
    case @opts.directory
    when "read"
      write "#{fname}: Is a directory"
    when "recurse"
      log { "recursing into directory #{fname}" }
      begin
        entries = Dir.entries(fname).reject { |x| x == "." || x == ".." }
        entries.each do |e|
          entname = fname + "/" + e
          inode = File.exists?(entname) && File.stat(entname).ino
          if inode && @searched_files.include?(inode)
            Log.verbose && log("file already processed: #{entname}")
          else
            @searched_files << inode
            search(entname)
          end
        end
      rescue Errno::EACCES => e
        write "directory not readable: #{fname}"
      end
    when "skip"
      log { "skipping directory #{fname}" }
    else
      log { "directory: #{@opts.directory}" }
    end
  end

  def search_unknown(fname)
    warn "unknown file type: #{fname}"
  end
        
  def search_none(fname)
    write "no such file: #{fname}"
  end

  def search_unreadable(fname)
    log { "skipping unreadable: #{fname}" }
  end

  def search(name)
    if @opts.exclude_matching
      expr = @opts.expr
      if expr.respond_to?(:re) && expr.re.match(name)
        log { "skipping file #{name} with matching name" }
        return
      else
        log { "not skipping file #{name}" }
      end
    end
        
    if name == "-" 
      write "reading standard input..."
      search_text("-")
    else
      type = FileTester.type(name)

      case type
      when FileTester::BINARY
        search_binary(name)
      when FileTester::DIRECTORY
        search_directory(name)
      when FileTester::NONE
        search_none(name)
      when FileTester::TEXT
        search_text(name)
      when FileTester::UNKNOWN
        search_unknown(name)
      when FileTester::UNREADABLE
        search_unreadable(name)
      else
        error "type unknown: file: #{name}; type: #{type}"
        exit(-2)
      end
    end
  end
end


# -------------------------------------------------------
# Function Object
# -------------------------------------------------------

# A function object, which can be applied (processed) against a InputFile.
class FuncObj
  include Loggable
  
  attr_accessor :match_line_number, :file, :matches, :invert_match

  def initialize
    @match_line_number = nil
    @matches           = Array.new
    
    opts               = GlarkOptions.instance
    @invert_match      = opts.invert_match
    @display_matches   = !opts.file_names_only && opts.filter && !opts.count
    @range_start       = opts.range_start
    @range_end         = opts.range_end
    @file_names_only   = opts.file_names_only
    @match_limit       = opts.match_limit
    @write_null        = opts.write_null
    @filter            = opts.filter
  end

  def add_match(lnum)
    @matches.push(lnum)
  end

  def start_position
    match_line_number
  end

  def end_position
    start_position
  end

  def reset_file(file)
    @match_line_number = nil
    @file              = file
    @matches           = Array.new
  end

  def range(var, infile)
    if var
      if var.index(/([\.\d]+)%/)
        count = infile.linecount
        count * $1.to_f / 100
      else
        var.to_f
      end
    else
      nil
    end
  end

  def process(infile)
    got_match = false
    reset_file(infile.fname)
    
    rgstart  = range(@range_start, infile)
    rgend    = range(@range_end,   infile)

    lastmatch = 0
    nmatches = 0
    lnum = 0
    infile.each_line do |line|
      if ((!rgstart || lnum >= rgstart) && 
          (!rgend   || lnum <= rgend)   &&
          evaluate(line, lnum, infile))
  
        mark_as_match(infile)
        got_match = true
        nmatches += 1
        
        if @display_matches
          infile.write_matches(!@invert_match, lastmatch, lnum)
          lastmatch = lnum + 1
        elsif @file_names_only
          # we don't need to match more than once
          break
        end
        
        if @match_limit && nmatches >= @match_limit
          # we've found the match limit
          break
        end
      end
      lnum += 1
    end
    
    if @file_names_only
      if got_match != @invert_match
        if @write_null
          print infile.fname + "\0"
        else
          puts infile.fname
        end
      end
    elsif @filter
      if @invert_match
        infile.write_matches(false, 0, lnum)
      elsif got_match
        infile.write_matches(true, 0, lnum)
      end
    else
      infile.write_all
    end
  end

  def mark_as_match(infile)
    infile.mark_as_match(start_position)
  end

  def to_s
    str = inspect
  end
  
end


# -------------------------------------------------------
# Regular expression function object
# -------------------------------------------------------

# Applies a regular expression against a InputFile.
class RegexpFuncObj < FuncObj

  attr_reader :re

  def initialize(re, hlidx, args = Hash.new)
    @re              = re
    @file            = nil
    if @highlight = args[:highlight]
      @text_highlights = args[:text_highlights]
      @hlidx           = if @text_highlights.length > 0 && args[:highlight] == "multi"
                           hlidx % @text_highlights.length
                         else
                           0
                         end 
    end
    
    @extract_matches = args[:extract_matches]
    
    super()
  end

  def <=>(other)
    @re <=> other.re
  end

  def ==(other)
    @re == other.re
  end

  def inspect
    @re.inspect
  end

  def match?(line)
    @re.match(line)
  end

  def evaluate(line, lnum, file)
    if Log.verbose
      log { "evaluating <<<#{line[0 .. -2]}>>>" }
    end
    
    if md = match?(line)      
      log { "matched" }
      if @extract_matches
        if md.kind_of?(MatchData)
          line.replace(md[-1] + "\n")
        else
          warn "--not does not work with -v"
        end
      else
        # log { "NOT replacing line" }
      end
      
      @match_line_number = lnum

      if @highlight
        highlight_match(lnum, file)
      end
      
      add_match(lnum)
      true
    else
      false
    end
  end
  
  def explain(level = 0)
    " " * level + to_s + "\n"
  end

  def highlight_match(lnum, file)
    log { "lnum: #{lnum}; file: #{file}" }
    
    lnums = file.get_range(lnum)
    log { "lnums(#{lnum}): #{lnums}" }
    if lnums
      lnums.each do |ln|
        str = file.output.formatted[ln] || file.get_line(ln)
        if Log.verbose
          log { "file.output.formatted[#{ln}]: #{file.output.formatted[ln]}" }
          log { "file.get_line(#{ln}): #{file.get_line(ln)}" }
          log { "highlighting: #{str}" }
        end
        
        file.output.formatted[ln] = str.gsub(@re) do |m|
          lastcapts = Regexp.last_match.captures
          miidx = (0 ... lastcapts.length).find { |mi| lastcapts[mi] } || @hlidx
          
          @text_highlights[miidx].highlight(m)
        end
      end
    end
  end
  
end


# -------------------------------------------------------
# Compound expression function object
# -------------------------------------------------------

# Associates a pair of expressions.
class CompoundExpression < FuncObj

  attr_reader :ops

  def initialize(*ops)
    @ops  = ops
    @file = nil
    super()
  end

  def reset_file(file)
    @ops.each do |op|
      op.reset_file(file)
    end
    super
  end

  def start_position
    @last_start
  end
  
  def ==(other)
    self.class == other.class && @ops == other.ops
  end
  
end


# -------------------------------------------------------
# Multi-Or expression function object
# -------------------------------------------------------

# Evaluates both expressions.
class MultiOrExpression < CompoundExpression

  def evaluate(line, lnum, file)
    matched_ops = @ops.select do |op|
      op.evaluate(line, lnum, file)
    end

    if is_match?(matched_ops)
      lastmatch          = matched_ops[-1]
      @last_start        = lastmatch.start_position
      @last_end          = lastmatch.end_position
      @match_line_number = lnum
      
      add_match(lnum)
      true
    else
      false
    end
  end

  def inspect
    "(" + @ops.collect { |op| op.to_s }.join(" " + operator + " ") + ")"
  end

  def end_position
    @last_end
  end

  def explain(level = 0)
    str  = " " * level + criteria + ":\n"
    str += @ops.collect { |op| op.explain(level + 4) }.join(" " * level + operator + "\n")
    str
  end
  
end


# -------------------------------------------------------
# Inclusive or expression function object
# -------------------------------------------------------

# Evaluates the expressions, and is satisfied when one return true.
class InclusiveOrExpression < MultiOrExpression

  def is_match?(matched_ops)
    return matched_ops.size > 0
  end

  def operator
    "or"
  end

  def criteria
    ops.size == 2 ? "either" : "any of"
  end

end


# -------------------------------------------------------
# Exclusive or expression function object
# -------------------------------------------------------

# Evaluates the expressions, and is satisfied when only one returns true.
class ExclusiveOrExpression < MultiOrExpression

  def is_match?(matched_ops)
    return matched_ops.size == 1
  end

  def operator
    "xor"
  end

  def criteria
    "only one of"
  end

end


# -------------------------------------------------------
# And expression function object
# -------------------------------------------------------

# Evaluates both expressions, and is satisfied when both return true.
class AndExpression < CompoundExpression
  
  def initialize(dist, op1, op2)
    @dist = dist
    super(op1, op2)
  end

  def mark_as_match(infile)
    infile.mark_as_match(start_position, end_position)
  end

  def match_within_distance(op, lnum)
    stack "op: #{op}; lnum: #{lnum}"
    op.matches.size > 0 and (op.matches[-1] - lnum <= @dist)
  end

  def inspect
    str = "("+ @ops[0].to_s
    if @dist == 0
      str += " same line as "
    elsif @dist.kind_of?(Float) && @dist.infinite?
      str += " same file as "
    else 
      str += " within " + @dist.to_s + " lines of "
    end
    str += @ops[1].to_s + ")"
    str
  end

  def match?(line, lnum, file)
    matches = (0 ... @ops.length).select do |oi|
      @ops[oi].evaluate(line, lnum, file)
    end

    matches.each do |mi|
      oidx  = (1 + mi) % @ops.length
      other = @ops[oidx]
      if match_within_distance(other, lnum)
        # search for the maximum match within the distance limit
        other.matches.each do |m|
          if lnum - m <= @dist
            log { "match: #{m} within range #{@dist} of #{lnum}" }
            @last_start = m
            return true
          end
        end
        log { "other matches out of range" }
        return false
      end
    end

    return false
  end
  
  def end_position
    @ops.collect { |op| op.end_position }.max
  end

  def evaluate(line, lnum, file)
    if match?(line, lnum, file)
      @match_line_number = lnum
      true
    else
      false
    end
  end

  def explain(level = 0)
    str = ""
    if @dist == 0
      str += " " * level + "on the same line:\n"
    elsif @dist.kind_of?(Float) && @dist.infinite?
      str += " " * level + "in the same file:\n"
    else 
      lnstr = @dist == 1 ? "line" : "lines"
      str += " " * level + "within #{@dist} #{lnstr} of each other:\n"
    end
    str += @ops[0].explain(level + 4)
    str += " " * level + "and\n"
    str += @ops[1].explain(level + 4)
    str
  end
  
end


# -------------------------------------------------------
# Expression function object creator
# -------------------------------------------------------

class ExpressionFactory
  include Loggable

  # signifies no limit to the distance between matches, i.e., anywhere within
  # the entire file is valid.
  INFINITE_DISTANCE = -1

  attr_reader :expr

  def initialize
    @regexps         = 0

    opts             = GlarkOptions.instance
    
    @ignorecase      = opts.nocase
    @wholewords      = opts.whole_words
    @wholelines      = opts.whole_lines
    @extended        = opts.extended
    @multiline       = opts.multiline
    @highlight       = opts.highlight
    @text_highlights = opts.text_highlights
    @extract_matches = opts.extract_matches
  end

  # reads a file containing one regular expression per line.
  def read_file(fname)
    log { "reading file: #{fname}" }
    expr = nil
    File.open(fname) do |file|
      file.each_line do |line|
        log { "line: #{line}" }
        line.strip!
        unless line.empty?
          # flatten the or expression instead of nesting it, to avoid
          # stack overruns for very large files.
          re = make_regular_expression(line.chomp)
          if expr 
            expr.ops << re
          else
            expr = InclusiveOrExpression.new(re)
          end
        end
      end
    end
    
    log { "returning expression #{expr}" }
    
    expr
  end

  def make_regular_expression(pattern, negated = false)
    # this check is because they may have omitted the pattern, e.g.:
    #   % glark *.cpp
    if File.exists?(pattern)
      warn "pattern '#{pattern}' exists as a file.\n    Pattern may have been omitted."
    end

    regex = Regexp.create(pattern.dup, 
                          :negated    => negated, 
                          :ignorecase => @ignorecase,
                          :wholewords => @wholewords,
                          :wholelines => @wholelines,
                          :extended   => @extended,
                          :multiline  => @multiline)

    regex_args = {
      :highlight => @highlight,
      :text_highlights => @text_highlights
    }    

    re = RegexpFuncObj.new(regex, @regexps, regex_args)
    @regexps += 1
    re
  end

  # creates two expressions and returns them.
  def make_expressions(args)
    a1 = make_expression(args)
    a2 = make_expression(args)
    
    [ a1, a2 ]
  end

  # removes optional end tag
  def shift_end_tag(name, args)
    # explicit end tag is optional:
    args.shift if args[0] == ("--end-of-" + name)
  end
  
  def make_not_expression(args)
    expr = make_regular_expression(args, true)
    unless expr
      error "'not' expression takes one argument"
      exit 2
    end

    # explicit end tag is optional:
    shift_end_tag("not", args)
    expr
  end

  def make_two_expressions(args, type)
    a1, a2 = make_expressions(args)
    unless a1 && a2
      error "'" + type + "' expression takes two arguments"
      exit 2
    end

    shift_end_tag(type, args)
    [ a1, a2 ]
  end

  def make_or_expression(args)
    a1, a2 = make_two_expressions(args, "or")
    InclusiveOrExpression.new(a1, a2)
  end

  def make_xor_expression(args)
    a1, a2 = make_two_expressions(args, "xor")
    ExclusiveOrExpression.new(a1, a2)
  end

  def numeric?(x)
    x && (x.kind_of?(Fixnum) || (x.to_i == INFINITE_DISTANCE || x.num))
  end  

  def make_and_distance(arg, args)
    dist = nil
    if arg == "-a"
      dist = args.shift
    elsif arg == "--and"
      if args.size > 0 && numeric?(args[0])
        dist = args.shift
      else
        dist = "0"
      end
    elsif arg.index(/^--and=(\-?\d+)?$/)
      dist = $1
    end

    # check to ensure that this is numeric
    if !numeric?(dist)
      error "invalid distance for 'and' expression: '#{dist}'\n" +
        "    expecting an integer, or #{INFINITE_DISTANCE} for 'infinite'" 
      exit 2
    end
    
    if dist.to_i == INFINITE_DISTANCE
      dist = 1.0 / 0.0            # infinity
    else
      dist = dist.to_i
    end

    dist
  end
  
  def make_and_expression(arg, args)
    dist = make_and_distance(arg, args)

    a1, a2 = make_two_expressions(args, "and")
    AndExpression.new(dist, a1, a2)
  end

  def make_infix_expression(arg, args = [])
    expr = nil

    while arg
      case arg
      when '('
        arg  = args.shift
        expr = make_infix_expression(arg, args)
      when '--or', '-o'
        arg  = args.shift
        rhs  = make_infix_expression(arg, args)
        expr = InclusiveOrExpression.new(expr, rhs)
      when '--xor'
        arg  = args.shift
        rhs  = make_infix_expression(arg, args)
        expr = ExclusiveOrExpression.new(expr, rhs)
      when Regexp.new('^--and'), '-a'
        dist = make_and_distance(arg, args)
        arg  = args.shift
        rhs  = make_infix_expression(arg, args)
        expr = AndExpression.new(dist, expr, rhs)
      when ')'
        break
      else
        # blather "assuming the last argument #{arg} is a pattern"
        expr = make_regular_expression(arg)
        break
      end
      arg = args.shift
    end

    if !expr
      puts "arg: #{arg}; args: #{args.inspect}"
      error "No expression provided."
    end

    expr
  end

  def make_expression(args, warn_option = false)
    arg = args[0]
    
    if arg
      case arg
      when "--or", "-o"
        args.shift
        make_or_expression(args)
      when "--xor"
        args.shift
        make_xor_expression(args)
      when %r{^\-\-and}, %r{^\-a}
        args.shift
        make_and_expression(arg, args)
      when '('
        args.shift
        make_infix_expression(arg, args)
      else
        if warn_option && arg.index(/^\-{1,2}\w/)
          warn "option not understood: #{arg}"
          exit 2
        end

        # blather "assuming the last argument #{arg} is a pattern"
        args.shift
        make_regular_expression(arg)
      end
    else
      nil
    end
  end

end


def main
  begin
    Log.set_widths(-15, 5, -40, -40)

    Log.log { "loading options" }
    opts = GlarkOptions.instance
    opts.run(ARGV)
    Log.log { "done loading options" }

    # To get rid of the annoying stack trace on ctrl-C:
    trap("INT") { abort }
    
    if opts.explain
      puts opts.expr.explain
    end

    files = if ARGV.size > 0 then
              if opts.split_as_path
                ARGV.collect { |f| f.split(File::PATH_SEPARATOR) }.flatten
              else
                ARGV
              end
            else 
              [ '-' ]
            end

    glark = Glark.new(opts.expr, files)
    
    files.each do |f|
      glark.search(f) 
    end

    exit glark.exit_status
  rescue => e
    # show the message, and the stack trace only if verbose:
    $stderr.puts "error: #{e}"
    if true || opts.verbose
      $stderr.puts e.backtrace
      raise
    else
      exit 2
    end
  end
end

if __FILE__ == $0
  main
end
