#!/usr/bin/env ruby

require 'zlib'

if ARGV.size != 2
  STDERR.puts 'Usage: generate_nulls.rb <dir> <br>'
  exit 1
end

DIR      = ARGV[0]
BR_RANGE = eval(ARGV[1])

SCHEMA_PK = {
  'part' => [0],        # partkey
  'region' => [0],      # regionkey
  'nation' => [0],      # nationkey
  'supplier' => [0],    # suppkey
  'partsupp' => [0,1],  # partkey, suppkey
  'customer' => [0],    # custkey
  'orders' => [0],      # orderkey
  'lineitem' => [0,3]   # orderkey, linenumber
}

def add_nulls(dir, br)
  rg = Random.new(1)
  SCHEMA_PK.each do |table, pkeys|
    STDERR.puts "  Table #{table}"
    Zlib::GzipWriter.open("#{dir}/#{table}-br#{'%03i' % br}.tbl.gz") do |out_file|
      Zlib::GzipReader.open("#{dir}/#{table}-br000.tbl.gz") do |in_file|
        in_file.each_line do |line|
          row = line.split('|')
          row.each_index do |col|
            row[col] = '@' if rg.rand < br*0.01 unless pkeys.include? col
          end
          out_file.puts row.join('|')
        end
      end
    end
  end
end

Dir[DIR].sort.each do |dir|
  BR_RANGE.each do |br|
    STDERR.puts "Proccessing #{dir} BR=#{br}"
    add_nulls(dir, br)
  end
end
