#!/usr/bin/perl -w
# NAME: osmcut2.pl
# AIM: Read planet.osm, and extrct info...
use strict;
use warnings;
use File::Basename;  # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] )
use File::stat; # get file info like if ($sb = stat($fil)) { $date = $sb->mtime; and $size = $sb->size; }
use Time::HiRes qw(gettimeofday tv_interval);       # provide more accurate timings
use Cwd;
my $os = $^O;
my $perl_dir = '/home/geoff/bin';
my $PATH_SEP = '/';
my $temp_dir = '/tmp';
if ($os =~ /win/i) {
    $perl_dir = 'C:\GTools\perl';
    $temp_dir = $perl_dir;
    $PATH_SEP = "\\";
}
unshift(@INC, $perl_dir);
require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' Check paths in \@INC...\n";
# log file stuff
our ($LF);
my $pgmname = $0;
if ($pgmname =~ /(\\|\/)/) {
    my @tmpsp = split(/(\\|\/)/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $outfile = $temp_dir.$PATH_SEP."temp.$pgmname.txt";
open_log($outfile);

# user variables
my $VERS = "0.0.1 2012-01-06";
my $load_log = 0;
my $in_file = '';
my $verbosity = 0;
my $debug_on = 1;
my $def_file = 'D:\SAVES\OSM\planet-120229.osm';
my $out_xml = '';
my $max_line = 90;

### program variables
my @warnings = ();
my $cwd = cwd();

sub VERB1() { return $verbosity >= 1; }
sub VERB2() { return $verbosity >= 2; }
sub VERB5() { return $verbosity >= 5; }
sub VERB9() { return $verbosity >= 9; }

sub show_warnings($) {
    my ($val) = @_;
    if (@warnings) {
        prt( "\nGot ".scalar @warnings." WARNINGS...\n" );
        foreach my $itm (@warnings) {
           prt("$itm\n");
        }
        prt("\n");
    } else {
        prt( "\nNo warnings issued.\n\n" ) if (VERB9());
    }
}

sub pgm_exit($$) {
    my ($val,$msg) = @_;
    if (length($msg)) {
        $msg .= "\n" if (!($msg =~ /\n$/));
        prt($msg);
    }
    show_warnings($val);
    close_log($outfile,$load_log);
    exit($val);
}


sub prtw($) {
   my ($tx) = shift;
   $tx =~ s/\n$//;
   prt("$tx\n");
   push(@warnings,$tx);
}

sub get_hash_ref($) {
    my $tag = shift;
    my %hash = ();

    return \%hash;
}

sub process_in_file($) {
    my ($fil) = @_;
    my ($INF,$date,$size,$sb);
    if ($sb = stat($fil)) {
        $date = $sb->mtime;
        $size = $sb->size;
    } else {
        pgm_exit(1,"ERROR: Unable to stat file [$fil]\n"); 
    }
    if (! open $INF, "<$fil") {
        pgm_exit(1,"ERROR: Unable to open file [$fil]\n"); 
    }
	my ($line,$lnn,$len,$i,$ch,$tag,$it,$iq,$qc);
    my (@arr,$tag1,$txt,$show,$had_sp);
    my ($bgn,$end,$elap);
	$lnn = 0;
	$it = 0;
	$iq = 0;
	my $tag_cnt = 0;
    $txt = '';
    my %shown = ();
    my $hmax = int(($max_line - 4) / 2);
    my $max_line2 = $max_line + 10;
    my $file_off = 0;
    $show = 0;
    prt("Processing file $fil, ".get_nn($size)." bytes, ".lu_get_YYYYMMDD_hhmmss($date)."\n");
    $bgn = [ gettimeofday ];
	while (<$INF>) {
		$line = $_;
		$lnn++;
		$len = length($line);
        $file_off += $len + 1;
		chomp $line;
        $line = trim_all($line);
		$len = length($line);
		for ($i = 0; $i < $len; $i++) {
			$ch = substr($line,$i,1);
			if ($it) {
				if ($iq) {
					$tag .= $ch;
					$iq = 0 if ($ch eq $qc);
				} elsif ($ch eq '>') {
					$it = 0;
					$tag_cnt++;
                    if ($tag =~ /^\//) {
                        # end tag only
                    } elsif ($tag =~ /^changeset\s+/) {
                        # lots of these
                    } elsif ($tag =~ /^tag\s/) {
                        # quite a few
                    } else {
                        @arr = space_split($tag);
                        $tag1 = $arr[0];
                        if ($tag1 =~ /^\//) {
                            # closing tag
                        } else {
                            if (!defined $shown{$tag1}) {
                                $shown{$tag1} = 1;
                                prt("$lnn: [$txt] <$tag1\n");
                                $show = 1;
                            }
                        }
                    }
                    $txt = '';
                } else {
   					$tag .= $ch;
				}
			} else {
				if ($iq) {
					$iq = 0 if ($ch eq $qc);
				} elsif ($ch eq '<') {
					$tag = "";
					$it = 1;
                    $had_sp = 0;
                    next;
				}
                if ($ch =~ /\s/) {
                    $txt .= $ch if (length($txt));
                } else {
                    $txt .= $ch;
                }
			}
		}
        if ($show || (($lnn % 30000) == 0)) {
            #if (($len > $max_line2) && !$show) {
    	    #	$line = substr($line,0,$max_line)."...\n".substr($line,($len-$max_line));
            #}
		    #prt("$lnn:$tag_cnt:$file_off: $line\n");
            my $pctd = (($file_off / $size) * 100);
            my $pct = int(($pctd+0.05) * 10) / 10;
            $end = [ gettimeofday ];
            $elap = tv_interval( $bgn, $end );
            $end = secs_HHMMSS(int($elap * (100 / $pctd)+0.5));
		    prt("$lnn:$pct: $line $end\n");
            $show = 0;
        }
        #if ($lnn > 1000000) {
        #    last;
        #}
    }
	close $INF;
	prt("Read $lnn lines...\n");
}

#########################################
### MAIN ###
parse_args(@ARGV);
process_in_file($in_file);
pgm_exit(0,"");
########################################
sub give_help {
    prt("$pgmname: version $VERS\n");
    prt("Usage: $pgmname [options] in-file\n");
    prt("Options:\n");
    prt(" --help  (-h or -?) = This help, and exit 0.\n");
    prt(" --verb[n]     (-v) = Bump [or set] verbosity. def=$verbosity\n");
    prt(" --load        (-l) = Load LOG at end. ($outfile)\n");
    prt(" --out <file>  (-o) = Write output to this file.\n");
}

sub need_arg {
    my ($arg,@av) = @_;
    pgm_exit(1,"ERROR: [$arg] must have a following argument!\n") if (!@av);
}

sub parse_args {
    my (@av) = @_;
    my ($arg,$sarg);
    while (@av) {
        $arg = $av[0];
        if ($arg =~ /^-/) {
            $sarg = substr($arg,1);
            $sarg = substr($sarg,1) while ($sarg =~ /^-/);
            if (($sarg =~ /^h/i)||($sarg eq '?')) {
                give_help();
                pgm_exit(0,"Help exit(0)");
            } elsif ($sarg =~ /^v/) {
                if ($sarg =~ /^v.*(\d+)$/) {
                    $verbosity = $1;
                } else {
                    while ($sarg =~ /^v/) {
                        $verbosity++;
                        $sarg = substr($sarg,1);
                    }
                }
                prt("Verbosity = $verbosity\n") if (VERB1());
            } elsif ($sarg =~ /^l/) {
                $load_log = 1;
                prt("Set to load log at end.\n") if (VERB1());
            } elsif ($sarg =~ /^o/) {
                need_arg(@av);
                shift @av;
                $sarg = $av[0];
                $out_xml = $sarg;
                prt("Set out file to [$out_xml].\n") if (VERB1());
            } else {
                pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n");
            }
        } else {
            $in_file = $arg;
            prt("Set input to [$in_file]\n") if (VERB1());
        }
        shift @av;
    }

    if ((length($in_file) ==  0) && $debug_on) {
        $in_file = $def_file;
        prt("Set DEFAULT input to [$in_file]\n");
        $load_log = 1;
    }
    if (length($in_file) ==  0) {
        pgm_exit(1,"ERROR: No input files found in command!\n");
    }
    if (! -f $in_file) {
        pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n");
    }
}

# eof - template.pl
