#!/usr/bin/perl -w

# sitemap.pl -- a command-line utility for building an XHTML site map
#               using XML::XPath. Presumes that the documents being 
#		searched are well-formed XML or XHTML. 

# Usage:	perl --localpath=/file/path/to/serverroot/ 
#                     --hostname=http://my.server.tld
#                     --exts="xhtml, html" sitemap.p 
#

use strict;
use XML::XPath;
use XML::XPath::XMLParser;
use XML::Parser;
use File::Find;
use Getopt::Long;


my (%file_list, $local_path, $host_name, $file_exts, $out_file);

# parse the command line options:

##GetOptions( "localpath=s" => \$local_path,
##            "hostname=s"  => \$host_name,
##            "exts=s"      => \$file_exts);
$local_path = shift || '/homepage/ex1';
$host_name = shift || 'http://localhost';
$file_exts = shift || 'htm, html, xhtml';

die "Usage: perl --localpath=/file/path/to/root/ --hostname=http://my.server --exts='xhtml, html' sitemap.pl\n" 
     unless $local_path and $host_name and $file_exts;

$host_name =~ s/\/$//;  
$local_path =~ s/\/$//;  
my @file_exts = split (/\s?,\s?/, $file_exts);
my $ep = XML::Parser->new(Namespaces => 1); 
my $xp = XML::XPath->new;

# create the root elements for the output tree
my $root = XML::XPath::Node::Element->new('html');
my $body = XML::XPath::Node::Element->new('body');

%file_list = &find_local_files(\@file_exts, [$local_path]);

##
# now, to the hunt...
#


DIR: foreach my $directory ( sort (keys (%file_list))) {

    my $directory_container = XML::XPath::Node::Element->new('div');
    my $translated_directory = &translate_path($local_path, '', $directory);
    $translated_directory ||= '/';
    $directory_container->appendAttribute( XML::XPath::Node::Attribute->new('class', 'directory') );
    $directory_container->appendAttribute( XML::XPath::Node::Attribute->new('ID', $translated_directory) );

    my $directory_header = XML::XPath::Node::Element->new('h2');
    $directory_header->appendChild( XML::XPath::Node::Text->new($translated_directory) );
    $directory_container->appendChild($directory_header);

    my $file_list = XML::XPath::Node::Element->new('ul');
    FILE: foreach my $file (sort (@{$file_list{$directory}})) {

        my ($root_node, $parser);
        my $list_item = XML::XPath::Node::Element->new('li');
        $xp->set_filename($file);
        
        eval {
        $parser = XML::XPath::XMLParser->new(
                 filename => $xp->get_filename,
                 parser => $ep);

        $root_node = $parser->parse;        
        $xp->find('/*', $root_node);
        };
        if ($@) {
            $list_item->appendChild( XML::XPath::Node::Text->new("ERROR parsing file '$file': " . $@) );
            $file_list->appendChild($list_item);
            next FILE;
        }
        my $translated_uri = &translate_path($local_path, $host_name, $file);

        my $file_link = XML::XPath::Node::Element->new('a');
        $file_link->appendAttribute( XML::XPath::Node::Attribute->new('href' , $translated_uri) );
        my $title = $xp->find('/html/head/title', $root_node)->string_value || "Untitled Document [ $translated_uri ]";

        $file_link->appendChild( XML::XPath::Node::Text->new($title) );

        my $desc = $xp->find(q{/html/head/meta[@content][@name="description"]}, $root_node)->string_value 
                 || 'No Description';
            
        
        # add the description and keywords to as elements make a seachable index.
        my $desc_node = XML::XPath::Node::Element->new('description');
        $desc_node->appendChild( XML::XPath::Node::Text->new(" $desc ") );

        my $keywords_text = $xp->find(q{/html/head/meta[@content][@name="keywords"]}, $root_node)->string_value
                 || '';
        my $keywords_node = XML::XPath::Node::Element->new('keywords');
        $keywords_node->appendChild( XML::XPath::Node::Text->new($keywords_text) );

        # add the child nodes to the 'li' element
        $list_item->appendChild($file_link);
        $list_item->appendChild($keywords_node);
        $list_item->appendChild($desc_node );
   
        # add the anchor to the output
        $file_list->appendChild($list_item);
    }

    # add the list to the direcory container, and 
    # the container to the main body,
    $directory_container->appendChild($file_list);
    $body->appendChild($directory_container);
}

$root->appendChild($body);
print $root->toString;
print "\n";

# utility subroutines

sub find_local_files {
    my ($extensions, $directories) = @_;
    my %file_list = ();   
    my $extension_re = '(';
    $extension_re .= join ('|', @{$extensions});
    $extension_re .= ')';
        
    local *wanted_files = sub {
                              return if -d;
                              return if -l;
                              push (@{$file_list{$File::Find::dir}}, $File::Find::name)
                              if $File::Find::name =~ /\.$extension_re$/;
                          };
        
    File::Find::find(\&wanted_files, @{$directories});
    return %file_list;
}

sub translate_path {     
    my ($old_path, $new_path, $file) = @_;
    $file =~ s|$old_path|$new_path|;
    return $file;             
}
