// urlhistory.cpp : Defines the entry point for the console application.
//
#ifndef _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_WARNINGS
#endif
#include "stdafx.h"
#include <atlbase.h>
#include <shlobj.h>
#include <UrlHist.h>
/* this gives the structure ---
typedef struct _STATURL
    {
    DWORD cbSize;
    LPWSTR pwcsUrl;
    LPWSTR pwcsTitle;
    FILETIME ftLastVisited;
    FILETIME ftLastUpdated;
    FILETIME ftExpires;
    DWORD dwFlags;
    } 	STATURL;

   The FILETIME structure is a 64-bit value representing
   the number of 100-nanosecond intervals since January 1, 1601 (UTC).
   typedef struct _FILETIME {
      DWORD dwLowDateTime;
      DWORD dwHighDateTime;
   } FILETIME, *PFILETIME;

   ============================= */

#define  VFH(a)   ( a && ( a != INVALID_HANDLE_VALUE ) )
#define  CHKMEM(a)   if(!a) { printf("ERROR: MEMORY FAILED! Aborting\n"); exit(1); }
#define  EndBuf(a)   ( a + strlen(a) )

HANDLE   hStdOut = NULL;
HANDLE   outfile = NULL;
_TCHAR * file_name = L"tempfil.txt";
_TCHAR * end_of_line = L"\r\n";
char * eol = "\r\n";
size_t url_length = 0;
size_t max_line_length = 0;
int   add_number = 0;
int   out_file = 0;
int   add_files = 0;
int   url_count = 0;
int   clean_url = 0;
int   add_javascript = 0;
int   add_ms_help = 0;
int   remove_www = 0;
int   show_all = 0;
int   sort_by_date = 0;
/* some LONG URL, from search engines like Yahoo! contain 2nd URLS
   like
   [http://rds.yahoo.com/_ylt=A0geu.qJ6x5JV2oBlTJXNyoA/SIG=16s96uj9n/
   EXP=1226849545/**http%3a//search.yahoo.com/search%3fei=UTF-8%26rp=
   Thailand%2bchayapum%2btelephone%26sado=1%26rd=r1%26meta=vc%253Dfr%26fr=
   yfp-t-501%26fp_ip=FR%26p=Thailand%2bchaiyaphum%2btelephone%2b%2522area%2bcode%2522]
   LastVisit=[11/15/2008 15:31] Expires=[12/11/2008 15:31]
   ================================================================== */
typedef struct tagUSTG2CHR {
   char * stg;
   char  chr;
}USTG2CHR, * PUSTG2CHR;

USTG2CHR ustg2chr[] = {
   { "%3a", ':' },
   { "%3f", '?' },
   { "%26", '&' },
   { "%2b", '+' },
   { "%25", '%' },
   { "%22", '"' },
   { "%2f", '/' },
   { "%3d", '=' },
   { "%2c", ',' },
   { 0    , 0   }
};

int   check_second = 1;
int cmp_only_url = 0;   // do NOT keep copies

char tmpbuff[1024];
char tmpurl[1024];
SYSTEMTIME  st;
FILETIME ftEarliest;
FILETIME ftLatest = {0};

#define  MAX_URL  (MAX_PATH * 2)
#define  MAX_PROT 16

typedef struct tagURLS {
   void * next;      // pointer to NEXT
   FILETIME ftLastVisited;
   int   index;
   char prot[MAX_PROT];
   char url[MAX_URL];
   char fullurl[MAX_URL * 2];
}URLS, * PURLS;

PURLS purls = NULL;

// forward references
void add_url_to_list( char * url, char * prot, FILETIME ftLast, char *pfull );


void give_help( void )
{
   printf("urlhistory: %s at %s\n", __DATE__, __TIME__ );
   printf("Options: Each can be followed by + or -, for on (default), off ...\n" );
   printf(" -a        = Show all URL, in full ...\n");
   printf(" -c        = Clean URL to domain name ...\n");
   printf(" -f        = Add local files to history output.\n");
   printf(" -j        = Add javascript to the listing.\n" );
   printf(" -m        = Add ms-help to the listing.\n" );
   printf(" -n        = Add numbering to output.\n");
   printf(" -o        = Output to tempfil.txt.\n");
   printf(" -s        = Sort by date.\n");
   printf(" -w        = Remove www, if present.\n" );
   exit(2);
}

void bad_argument( _TCHAR * parg )
{
	USES_CONVERSION;
   char * cp = W2A(parg);
   printf("ERROR: Unknown argument [%s]!\n", cp );
   give_help();
}

int chk_arg( _TCHAR * pinarg )
{
   int   res = 1;
   _TCHAR * parg = pinarg + 1;
   if ( *parg ) {
      if( *parg == L'+' )
         res = 1;
      else if( *parg == L'-' )
         res = 0;
      else
         bad_argument( pinarg );
   }
   return res;
}

void  process_commands( int argc, _TCHAR* argv[] )
{
   int i;
   for( i = 1; i < argc; i++ )
   {
      _TCHAR * arg = argv[i];
      if( arg )
      {
         _TCHAR c = *arg;
         if( ( c == L'-' ) || ( c == L'/' ) )
         {
            arg++;
            if( *arg == L'n' )
               add_number = chk_arg(arg);
            else if( *arg == L'o' )
               out_file = chk_arg(arg);
            else if( *arg == L'a' )
               show_all = chk_arg(arg);
            else if( *arg == L'f' )
               add_files = chk_arg(arg);
            else if( *arg == L'c' )
               clean_url = chk_arg(arg);
            else if( *arg == L'j' )
               add_javascript = chk_arg(arg);
            else if( *arg == L'm' )
               add_ms_help = chk_arg(arg);
            else if( *arg == L's' )
               sort_by_date = chk_arg(arg);
            else if( *arg == L'w' )
               remove_www = chk_arg(arg);
            else if( *arg == L'2' )
               check_second = chk_arg(arg);
            else
               bad_argument(arg);
         } else {
            bad_argument(arg);
         }
      }
   }
}

char * Instr( char * src, char * find )
{
   char * cp = src;
   char * found = NULL;
   size_t len = strlen(find);
   if(len) {
      while(*cp)
      {
         if( _strnicmp(cp, find, len) == 0 ) {
            found = cp;
            break;
         }
         cp++;
      }
   }
   return found;
}

#define  ISNUM(a) (( a >= '0' )&&( a <= '9' ))
#define  ISHLOW(a) (( a >= 'a' )&&( a <= 'f' ))
#define  ISHUP(a) (( a >= 'A' )&&( a <= 'F' ))

#define  ISHEX(a) ISNUM(a) || ISHLOW(a) || ISHUP(a)

int got_sub( char * psub, char * pc )
{
   int iret = 0;
   PUSTG2CHR pus = &ustg2chr[0];

   while( pus->stg )
   {
      if( stricmp(psub,pus->stg) == 0 ) {
         *pc = pus->chr;
         iret = 1;
         break;
      }
      pus++;
   }
   return iret;
}

int do_subs_in_buf( char * pinfull )
{
   static URLS _s_sUrl2;
   int   iret = 0;
   PURLS psUrl = &_s_sUrl2;
   char * pfull = pinfull;
   char * pout = psUrl->fullurl;
   char * purl = psUrl->url;
   char * p;
   char * cp;
   int   cnt;
   char c;

   p = strchr(pfull, '%');
   if(p) {
      cp = pfull;
      cnt = 0;
      while(p) {
         while(cp < p) {
            pout[cnt++] = *cp++;
         }
         pout[cnt] = 0;
         if( ISHEX(p[1]) && ISHEX(p[2]))
         {
            purl[0] = p[0];
            purl[1] = p[1];
            purl[2] = p[2];
            purl[3] = 0;
            if( got_sub( purl, &c ) )
            {
               pout[cnt++] = c;
               p++;  // use up these 3
               p++;
               p++;
               iret++;  // signal a SUB done
            } else {
#ifndef NDEBUG
               int num = 0;
               char u;
               if( ISNUM(p[1]) )
                  num = p[1] - '0';
               else
               {
                  u = toupper(p[1]);
                  num = (u - 'A' + 10);
               }
               num *= 16;
               if( ISNUM(p[2]) )
                  num += p[2] - '0';
               else
               {
                  u = toupper(p[2]);
                  num += (u - 'A' + 10);
               }
               printf( "\nWARNING: NO SUB FOUND FOR [%s] CHECK ME! (num %d = char [%c] = hex [%x])\n", p, num, num, num );
#endif
               pout[cnt++] = *p++;
            }
         } else {
            pout[cnt++] = *p++;
         }
         cp = p;
         p = strchr(p,'%');
      }
      while(*cp) {
         pout[cnt++] = *cp++;
      }
      pout[cnt] = 0;
      strcpy(pinfull,pout);
      if(iret) {
         while( do_subs_in_buf(pinfull) )
           cnt++;
      }
      iret = 0;
   }
   return iret;
}

int split_url( PURLS psUrl )
{
   static URLS _s_sUrl3;
   PURLS psUrl3 = &_s_sUrl3;
   int   iret = 0;
   char * pfull = psUrl->fullurl;
   char * pprot = psUrl->prot;
   char * purl  = psUrl->url;
   size_t len = strlen(pfull);
   char * cp;
   int    cnt;
   char * p;
   if(len) {
      strcpy(psUrl3->fullurl,pfull);
      do_subs_in_buf(pfull);
#ifndef NDEBUG
      if( strcmp(psUrl3->fullurl,pfull) )
         printf("Check sub [%s]%d\n became   [%s]%d!\n", psUrl3->fullurl, 
         strlen(psUrl3->fullurl), pfull, strlen(pfull));
#endif
      p = strchr(pfull, '/');
      if(p) {
         cp = pfull;
         cnt = 0;
         iret++;
         while(cp < p) {
            pprot[cnt++] = *cp++;
         }
         pprot[cnt] = 0;
         while(*p == '/')
            p++;
         cp = p;
         p = strchr(p,'?');
         if(p) {
            cnt = 0;
            while(cp < p) {
               purl[cnt++] = *cp++;
            }
            purl[cnt] = 0;
         } else {
            strcpy(purl,cp);
         }
      }
   }
   return iret;
}

void add_surl_to_list( PURLS purl )
{
   add_url_to_list( purl->url, purl->prot, purl->ftLastVisited, purl->fullurl );
}

void do_check_second( char * pfull, FILETIME ftLast )
{
   static URLS _s_sUrl;
   PURLS psUrl = &_s_sUrl;
   char * cp = pfull + 4;
   char * find = "http";
   char * p = Instr(cp,find);
   if( p ) {
      // found another ...
      size_t len = strlen(p);
      strcpy( psUrl->fullurl, p );
      if( split_url( psUrl ) )
      {
         psUrl->ftLastVisited = ftLast;
         add_surl_to_list(psUrl);
      }
   }
}


void add_url_to_list( char * url, char * prot, FILETIME ftLast, char *pfull )
{
   PURLS pu;
   PURLS plast = NULL;

   if( check_second )
      do_check_second( pfull, ftLast );
   pu = purls;
   while(pu)
   {
      void * next = pu->next;
      plast = pu;
      if( strcmp( pfull, pu->fullurl ) == 0 )
         return;
      if( cmp_only_url ) {
         if( ( stricmp(url, pu->url) == 0 ) && (stricmp(prot, pu->prot) == 0) )
            return;  // is already IN list
      }
      pu = (PURLS)next;
   }

   pu = new URLS;
   CHKMEM(pu);
   pu->next = NULL;
   if(strlen(url) < MAX_URL)
      strcpy(pu->url, url);
   else
   {
      strncpy(pu->url, url, MAX_URL-2);
      pu->url[MAX_URL-1] = 0;
   }
   if(strlen(prot) < MAX_PROT)
      strcpy(pu->prot, prot);
   else
   {
      strncpy(pu->prot, prot, MAX_PROT-2);
      pu->prot[MAX_PROT-1] = 0;
   }

   pu->ftLastVisited = ftLast;   // add the FILETIME to the structure
   pu->index = 0; // no date index, yet
   strcpy(pu->fullurl, pfull);

   if(plast)
      plast->next = pu;
   else
      purls = pu;

}

void create_out( void )
{
   outfile = CreateFile( file_name, 
      (GENERIC_READ | GENERIC_WRITE),
      FILE_SHARE_READ,
      NULL,
      CREATE_ALWAYS,
      0,
      NULL);
}

void write2file( _TCHAR * url )
{
   if( outfile == NULL )
      create_out();
   if( VFH(outfile) )
   {
      DWORD wtn, len;
#ifdef UNICODE
      len = (DWORD)wcslen(url);
#else
      len = (DWORD)strlen(url);
#endif
      WriteFile( outfile, url, len, &wtn, NULL );
      if( wtn != len )
      {
         CloseHandle(outfile);
         outfile = INVALID_HANDLE_VALUE;
      }
#ifdef UNICODE
      len = (DWORD)wcslen(end_of_line);
#else
      len = (DWORD)strlen(end_of_line);
#endif
      WriteFile( outfile, end_of_line, len, &wtn, NULL );
      if( wtn != len )
      {
         CloseHandle(outfile);
         outfile = INVALID_HANDLE_VALUE;
      }
   }
}

void write2filec( char * url )
{
   if( outfile == NULL )
      create_out();
   if( VFH(outfile) )
   {
      DWORD wtn, len;
      len = (DWORD)strlen(url);
      WriteFile( outfile, url, len, &wtn, NULL );
      if( wtn != len )
      {
         CloseHandle(outfile);
         outfile = INVALID_HANDLE_VALUE;
      }
      len = (DWORD)strlen(eol);
      WriteFile( outfile, eol, len, &wtn, NULL );
      if( wtn != len )
      {
         CloseHandle(outfile);
         outfile = INVALID_HANDLE_VALUE;
      }
   }
}

void wso_line( char * txt )
{
   if( hStdOut == NULL )
      hStdOut = GetStdHandle( STD_OUTPUT_HANDLE );   // Standard out
   if(VFH(hStdOut))
   {
      DWORD wtn;
      WriteFile(hStdOut, txt, (DWORD)strlen(txt), &wtn, NULL);
      WriteFile(hStdOut, "\r\n", 2, &wtn, NULL);
   }
}

SYSTEMTIME  stLastVisited, stLastUpdated, stExpires;
SYSTEMTIME  stUTC;

BOOL AboutSameTime(SYSTEMTIME * p1, SYSTEMTIME * p2)
{
   if(( p1->wYear == p2->wYear )&&
      ( p1->wMonth == p2->wMonth )&&
      ( p1->wDay == p2->wDay)&&
      ( p1->wHour == p2->wHour)&&
      ( p1->wMinute == p2->wMinute) )
      return TRUE;

   return FALSE;
}

void show_url( STATURL * psurl )
{
	USES_CONVERSION;
   char * purl = tmpurl;
   char * p;
   char * porg;
   _TCHAR * url = psurl->pwcsUrl;
   char * cp = W2A(url);

   url_count++;
   porg = cp;
   strcpy(purl, cp); // copy it into a buffer

   if( show_all )
   {
      char * tmp = tmpbuff;
      char * pt = W2A( psurl->pwcsTitle );
      size_t len;
      BOOL  failed1;

      len = strlen(cp);
      if( len > url_length )
         url_length = len;
      strcpy(tmp,"[");
      strcat(tmp,cp);
      strcat(tmp,"]");
      if(pt) {
         strcat(tmp," title=[");
         strcat(tmp,pt);
         strcat(tmp, "]");
      }
      failed1 = TRUE;
      if( FileTimeToSystemTime( &psurl->ftLastVisited, &stLastVisited ) ) {
         // Build a string showing the date and time.
         sprintf(EndBuf(tmp), " LastVisit=[%02d/%02d/%d %02d:%02d]",
            stLastVisited.wMonth, stLastVisited.wDay, stLastVisited.wYear,
            stLastVisited.wHour, stLastVisited.wMinute);
         failed1 = FALSE;
      }
      if((CompareFileTime(&psurl->ftLastVisited, &psurl->ftLastUpdated) != 0) &&
         ( FileTimeToSystemTime( &psurl->ftLastUpdated, &stLastUpdated ) ) ) {
            if( failed1 || !AboutSameTime(&stLastVisited, &stLastUpdated) ) {
               // Build a string showing the date and time.
               sprintf(EndBuf(tmp), " LastUpdated=[%02d/%02d/%d %02d:%02d]",
                  stLastUpdated.wMonth, stLastUpdated.wDay, stLastUpdated.wYear,
                  stLastUpdated.wHour, stLastUpdated.wMinute);
            }
      }
      if((CompareFileTime(&psurl->ftLastVisited, &psurl->ftExpires) != 0) &&
         ( FileTimeToSystemTime( &psurl->ftExpires, &stUTC ) ) ) {
         // Build a string showing the date and time.
         sprintf(EndBuf(tmp), " Expires=[%02d/%02d/%d %02d:%02d]",
            stUTC.wMonth, stUTC.wDay, stUTC.wYear,
            stUTC.wHour, stUTC.wMinute);
      }
      if( CompareFileTime( &psurl->ftLastVisited, &ftEarliest ) < 0 )
         ftEarliest = psurl->ftLastVisited;
      if( CompareFileTime( &psurl->ftLastVisited, &ftLatest ) > 0 )
         ftLatest = psurl->ftLastVisited;
      len = strlen(tmp);
      if(len > max_line_length)
         max_line_length = len;
      wso_line(tmp);
      if(out_file)
         write2filec( tmp );
      return;
   }

   if( !add_files )
   {
      if( _strnicmp(cp, "file", 4) == 0 )
         return;
   }
   if( !add_javascript )
   {
      if( _strnicmp(cp, "javascript", 10) == 0 )
         return;
   }
   if( !add_ms_help )
   {
      if( _strnicmp(cp, "ms-help", 7) == 0 )
         return;
      if( _strnicmp(cp, "hcp:", 4) == 0 )
         return;
      // ms-its:
      if( _strnicmp(cp, "ms-its:", 7) == 0 )
         return;
      // mk:
      if( _strnicmp(cp, "mk:", 3) == 0 )
         return;
      if( _strnicmp(cp, "res:", 4) == 0 )
         return;
   }

   if( clean_url ) {
      p = strchr(cp, '/');
      if(p)
      {
         while(*p == '/')
            p++;
         cp = p;
         p = strchr(cp,'/');
         if(p)
            *p = 0;
         p = strchr(cp,'?');
         if(p)
            *p = 0;
         if( remove_www )
         {
            if( _strnicmp(cp, "www.", 4) == 0 )
               cp += 4;
         }
      }
      p = strchr(porg, '/');
      if(p)
         *p = 0;
      add_url_to_list( cp, porg, psurl->ftLastVisited, purl );
      return;
   }

   if(out_file)
      write2filec( cp );

//#ifdef UNICODE
//      wprintf(L"%d: \"%s\"\n", cnt, suURL.pwcsUrl);
//#else
   if(add_number)
      printf("%d: \"%s\"\n", url_count, cp);
   else
      printf("%s\n", cp);
      //printf("%d: \"%s\"\n", cnt, W2T(suURL.pwcsUrl));
//#endif
		// hr = pUrlHistoryStg2->DeleteUrl(suURL.pwcsUrl, 0);
}

void show_url_list( void )
{
   int   cnt = 0;
   size_t   max;
   if( clean_url )
   {
      PURLS pu = purls;
      size_t maxu = 0;
      size_t maxp = 0;
      size_t len;
      void * next;
      char * tmpout;
      ULARGE_INTEGER ul1, ul2;
      while(pu)
      {
         next = pu->next;
         len = strlen(pu->url);
         if( len > maxu )
            maxu = len;
         len = strlen(pu->prot);
         if( len > maxp )
            maxp = len;
         pu = (PURLS)next;
      }
      maxu++;
      max = (maxu + maxp + 16);
      if(max < 1024)
         max = 1024;
      tmpout = new char[max];
      CHKMEM(tmpout);
      if( sort_by_date )
      {
         int index = 0;
         PURLS pun = NULL;
         char * tofile = new char[1024];
         CHKMEM(tofile);

         ul1.QuadPart = 0;
         pu = purls;
         pun = pu;
         // SET THE INDEX
         while(pun)
         {
            pun = NULL;
            pu = purls;
            ul1.QuadPart = 0; // set LOWEST time
            while(pu)
            {
               next = pu->next;
               if(!pu->index) {  // if NOT yet indexed
                  ul2.HighPart = pu->ftLastVisited.dwHighDateTime;
                  ul2.LowPart  = pu->ftLastVisited.dwLowDateTime;
                  if(ul2.QuadPart >= ul1.QuadPart) {
                     pun = pu;   // set to this LARGER
                     ul1.QuadPart = ul2.QuadPart;
                  }
               }
               pu = (PURLS)next;
            }
            if(pun)
            {
               index++;
               pun->index = index;
            }
         }
         // DO THE OUTPUT
         printf("Indexed %d items ...\n", index);
         while(index)
         {
            pu = purls;
            while(pu) {
               next = pu->next;
               if(pu->index == index) {
                  strcpy(tmpout, pu->url);
                  while(strlen(tmpout) < maxu)
                     strcat(tmpout," ");
                  strcat(tmpout, pu->prot);
                  if( FileTimeToSystemTime( &pu->ftLastVisited, &stLastVisited ) ) {
                     // Build a string showing the date and time.
                     sprintf(EndBuf(tmpout), " [%02d/%02d/%d %02d:%02d] ",
                        stLastVisited.wMonth, stLastVisited.wDay, stLastVisited.wYear,
                        stLastVisited.wHour, stLastVisited.wMinute);
                  }
                  // OUTPUT THE LIST
                  cnt++;
                  sprintf(tofile, "%d: %s (%s)", cnt, tmpout, pu->fullurl);
                  printf("%s\n", tofile);
                  if(out_file)
                     write2filec( tofile );
                  break;
               }
               pu = (PURLS)next;
            }
            index--; // reduce the INDEX
         }

         // DO THE DELETE
         pu = purls;
         while(pu)
         {
            next = pu->next;
            delete pu;
            pu = (PURLS)next;
         }
         delete tofile;
      }
      else
      {
         pu = purls;
         while(pu)
         {
            next = pu->next;
            strcpy(tmpout, pu->url);
            while(strlen(tmpout) < maxu)
               strcat(tmpout," ");
            // OUTPUT THE LIST
            printf("%s (%s)\n", tmpout, pu->prot);
            delete pu;
            pu = (PURLS)next;
            cnt++;
         }
      }
      sprintf(tmpout, "Listed %d of %d - cleaned URLs ...", cnt, url_count);
      printf("%s\n", tmpout);
      if(out_file)
         write2filec( tmpout );
      delete tmpout;
   }
}


void get_url_list( void )
{
	//USES_CONVERSION;
	CoInitialize(NULL);

	IUrlHistoryStg2* pUrlHistoryStg2 = NULL;
	HRESULT hr = CoCreateInstance(CLSID_CUrlHistory,
		NULL, CLSCTX_INPROC, IID_IUrlHistoryStg2,
		(void**)&pUrlHistoryStg2);

	IEnumSTATURL* pEnumURL;
	hr = pUrlHistoryStg2->EnumUrls(&pEnumURL);

	STATURL suURL;
	ULONG pceltFetched, cnt;
	suURL.cbSize = sizeof(suURL);
	hr = pEnumURL->Reset();

   cnt = 0;
	while((hr = pEnumURL->Next(1, &suURL, &pceltFetched)) == S_OK)
	{
      cnt++;
      show_url(&suURL);
	}

	pEnumURL->Release();
	pUrlHistoryStg2->Release();
	CoUninitialize();

}


int _tmain(int argc, _TCHAR* argv[])
{
   GetSystemTime(&st);
   SystemTimeToFileTime( &st, &ftEarliest );
   process_commands( argc, argv );
   get_url_list();
   show_url_list();
   if( show_all ) {
      printf( "Listed %d items, longest %d (%d)...", url_count, url_length,
         max_line_length );

      if( FileTimeToSystemTime( &ftEarliest, &st ) ) {
         // Build a string showing the date and time.
         printf(" Earliest=[%02d/%02d/%d %02d:%02d]",
            st.wMonth, st.wDay, st.wYear,
            st.wHour, st.wMinute);
      }
      if( FileTimeToSystemTime( &ftLatest, &st ) ) {
         // Build a string showing the date and time.
         printf(" Latest=[%02d/%02d/%d %02d:%02d]",
            st.wMonth, st.wDay, st.wYear,
            st.wHour, st.wMinute);
      }
      printf("\n");
   }
   return 0;
}

// eof - urlhistory.cpp
