#!/Perl
# Series: Automation of Microsoft Word
# original from : http://www.stouk.com/documents/perl/gui/guiref/page04.htm
# Purpose: Load an existing document, and write it out to a TEXT ONLY file.
# I had some initial trouble getting this to function, hence the addition of
# som eLOG file code, so I could review what was happening ... and there
# is a lot of DEAD CODE left commented out, to remind me of the tries ...
# author: geoff mclane - mailto: geoffair@hotmail.com - 2006-06-12
# 22/09/2007 - added command line argument parse -in infile -out outfile
# use strict; 
use Win32::OLE; 
use Win32::OLE::Const 'Microsoft Word'; 

my ($LOG);
my $write_log = 0;
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
	my @tmpsp = split(/\\/,$pgmname);
	$pgmname = $tmpsp[-1];
}
my $outfile = "temp.".$pgmname.".txt";
if ( open( $LOG, ">$outfile" ) ) {
    $write_log = 1;
} else {
    $write_log = 0;
    prt( "WARNING: Unable to open $outfile LOG ...\n" );
}

##################################################################################
# *** ALTER THIS TO POINT TO YOUR OWN TEST DOCUMENT, AND TEST TEXT FILE RESULT ***
#my $infile = 'c:\tmp\test.doc';
#my $file_out = 'c:\tmp\test.txt';
my $infile = 'c:\tmp\test3.doc';
my $file_out = 'c:\tmp\test3.txt';
my $do_clean = 1;
# OR ACCEPT COMMAND LINE INPUT
parse_args(@ARGV);
##################################################################################
my $dbg1 = 0;

### can be all in one line
### my $Word = Win32::OLE->GetActiveObject('Word.Application') || Win32::OLE->new('Word.Application', 'Quit'); 
### or tested, like ...
my $Word = Win32::OLE->GetActiveObject('Word.Application');
if ($Word) {
	prt( "Using running application found ... \n" );
} else {
	prt( "No running application found ... starting new ...\n" );
	$Word = Win32::OLE->new('Word.Application', 'Quit');
	if ($Word) {
		prt("Loaded Word application ...\n");
	} else {
		mydie("YEEK! Unable to load word ... aborting \n" );
	}
}

###prt( "Setting word as visible ...\n" );
###$Word->{'Visible'} = 1; # if you want to see what's going on, but it is all quite quick ;=))

prt( "Loading Word application with $infile...\n" );
#my $doc = $Word->Documents->Open({ 
# FileName => $infile, 
# ConfirmConversions => 0, 
# ReadOnly => 1, 
# AddToRecentFiles => 0, 
# Format => wdOpenFormatAuto}); 
# OR SIMPLY ...
my $doc = $Word->Documents->Open($infile) || mydie("Unable to open document " . Win32::OLE->LastError()); 

# appears these are not needed ...
#print "Setting the file name to $file_out ...\n";
#$Word->Documents->FileName($file_out); 
#$Word->Documents->FileFormat(1); 

$Word->Documents->AddToRecentFiles(0);

if ( -f $file_out ) {
	print "Deleting previous output $file_out ...\n";
	unlink $file_out; # remove any previous
}

# not sure this is needed, or does anything ...
###$Word->DisplayAlerts = wdAlertsNone; - no, should be
$Word->{'DisplayAlerts'} = wdAlertsNone;

# lots of tries that FAILED
###$Word->Documents->SaveAs( { 'FileName->'."$file_out",}); 
###my $cmd = "FileName -> $file_out";
###$Word->Documents->SaveAs( { $cmd } ); 
###$Word->Documents->SaveAs( "$file_out" ); 
# $Word->Documents->SaveAs( FileName => "$file_out" ); 
# $Word->Documents->SaveAs({
# $Word->SaveAs({
# $Word->ActiveDocument->SaveAs({
# and then it WORKED with ...
my $aDoc = $Word->ActiveDocument;
if ($aDoc) {
	prt( "Doing SaveAs on ActiveDocument ...to $file_out ...\n" );
	$aDoc->SaveAs({
		FileName =>  "$file_out",
		FileFormat =>  wdFormatDOSTextLineBreaks });
	if ( -f $file_out) {
		prt("Appears new file created ...\n");
		$Word->Documents->Close; 
		$Word->Documents->Quit; 
		$Word->Quit; # close it all ...
		if ($do_clean) {
			if (open FIL, "<$file_out") {
				my @lines = <FIL>;
				my @nlines = ();
				close FIL;
				prt( "Got ".scalar @lines." to process ...\n" );
				my $had = 0;
				foreach my $line (@lines) {
					my $ln = trim_all($line);
					prt( "$ln\n" ) if ($dbg1);
					if (($ln eq '>')||(length($ln) == 0)) {
						if ($had) {
							prt( "Had HAD ... skipped ...\n" ) if ($dbg1);
							next;
						}
						prt( "Set HAD ... skip next ...\n" ) if ($dbg1);
						$had = 1;
					} else {
						prt( "Clear HAD ... no skip ...\n" ) if ($dbg1);
						$had = 0;
					}
					push(@nlines, $line);
				}
				write_a_file( $file_out, @nlines );
			} else {
				prt("Warning: unable to open file $file_out ...\n");
			}
		}
	} else {
		prt("Warning: text file not created ...\n");
		$Word->Documents->Close; 
		$Word->Documents->Quit; 
		$Word->Quit; # close it all ...
	}
} else {
	prt("ERROR: Failed to get ActiveDocument ...\n");
	$Word->Documents->Close; 
	$Word->Documents->Quit; 
	$Word->Quit; # close it all ...
}

##$Word->Documents->Close; 
##$Word->Documents->Quit; 
##$Word->Quit; # close it all ...
close $LOG;
### system($outfile);

# ENUMERATION OF OUTPUT FORMATS
# wdFormatDocument - Microsoft Word format. 
# wdFormatDOSText - Microsoft DOS text format. 
# wdFormatDOSTextLineBreaks - Microsoft DOS text with line breaks preserved. 
# wdFormatEncodedText - Encoded text format. 
# wdFormatFilteredHTML - Filtered HTML format. 
# wdFormatHTML - Standard HTML format. 
# wdFormatRTF - Rich text format (RTF). 
# wdFormatTemplate - Microsoft Word template format. 
# wdFormatText - Microsoft Windows text format. 
# wdFormatTextLineBreaks - Microsoft Windows text format with line breaks preserved. 
# wdFormatUnicodeText - Unicode text format. 
# wdFormatWebArchive - Web archive format. 
# wdFormatXML - Extensible Markup Language (XML) format. 

################################
### output and log file
sub wlog {
	my $ml = shift;
	print $LOG $ml;
}

sub prt {
	my $m = shift;
	if ($write_log) {
		wlog($m);
	}
	print $m;
}

sub mydie {
	my $msg = shift;
	if ($write_log) {
		wlog($msg);
	}
	die $msg;
}

##############################
# argument parsing

# Ensure argument exists, or die.
sub require_arg {
    my ($arg, @arglist) = @_;
    mydie( "ERROR: no argument given for option '$arg' ...\n" ) if ! @arglist;
}

sub give_help {
	prt( "$pgmname [-in filepath] [-out filename] [-clean]\n" );
	prt( "Default in = $infile, out = $file_out ...\n" );
	mydie( "And of course you MUST have MS Word installed ...\n" );
}

sub parse_args {
	my (@av) = @_;
	while (@av) {
		my $arg = $av[0];
		if (($arg eq '?')||
			($arg eq '-?')||
			($arg eq '-h')||
			($arg eq -'H')) {
			give_help();
		} elsif ($arg eq '-clean') {
			$do_clean = 1;
			prt( "Do clean set...\n" );
		} elsif ($arg eq '-in') {
			require_arg(@av);
			shift @av;
			$arg = $av[0];
			$infile = $arg;
			prt( "In file set to [$infile]\n" );
		} elsif ($arg eq '-out') {
			require_arg(@av);
			shift @av;
			$arg = $av[0];
			$file_out = $arg;
			prt( "Out file set to [$file_out]\n" );
		} else {
			prt( "ERROR: Unknown command [$arg] ...\n" );
			give_help();
		}
		shift @av;
	}
}

#####################
### file action
sub write_a_file {
	my ($fil, @txt) = @_;
	open WOF, ">$fil" or mydie("ERROR: Unable to open $fil! $!\n");
	print WOF @txt;
	close WOF;
}

sub trim_all {
	my ($ln) = shift;
	chomp $ln;			# remove CR (\n)
	$ln =~ s/\r$//;		# remove LF (\r)
	$ln =~ s/\t/ /g;	# TAB(s) to a SPACE
	while ($ln =~ /\s\s/) {
		$ln =~ s/\s\s/ /g;	# all double space to SINGLE
	}
	while ($ln =~ /^\s/) {
		$ln = substr($ln,1); # remove all LEADING space
	}
	while ($ln =~ /\s$/) {
		$ln = substr($ln,0, length($ln) - 1); # remove all TRAILING space
	}
	return $ln;
}

# eof - autoword02.pl
