#!/usr/bin/perl -w
# $Id: aub_hook,v 1.7 2001/12/04 19:40:10 clindell Exp $
#
# Hook utility for use with aub to determine which images
# to retrieve.  Maintains a database of image names which
# have already been retrieved to cut down on unneccessary
# duplication and SPAM.  Currently requires that BSD DB
# support is available via PERL.
#
# Copyright 1997 Bob Friesenhahn, bfriesen@simple.dallas.tx.us
# Free for all uses public or private provided that the
# author retains rights to the original work.
#
# Usage:
#
# Execute as an aub 'hook' (from .aubconf) collecting new files.
# This makes a good default hook statement (appearing before
# any group entries):
#  hook aub_hook --db dbfile --hook
#
# Execute as an aub 'hook' (from .aubconf) collecting names
# only (to use as a filter later).  A group that contains
# SPAM or postings you don't like can be used to add names to
# the database so they can be excluded later.  This statement
# is ideal as a group specific hook statement (appearing
# after a group entry):
#  hook aub_hook --db dbfile --hook --fail
#
# Add file names provided via standard input to database:
#  aub_hook --db dbfile --add
#
# Delete names provided via standard input from database:
#  aub_hook --db dbfile --delete
#
# List repeats based on list of files supplied from standard
# input:
#  aub_hook --db dbfile --repeat
#
# List contents of database:
#  aub_hook --db dbfile --list
#
# To start things off, you may want to build a database of your
# current files by executing something simlar to the following:
#
# find /image/directory  -xdev \( -type d -name '.*' -prune \) \
#  -o \( -type f \! -name '.*' \) -print \
#  | aub_hook --db dbfile --add

use strict;
use DB_File;
use Fcntl;
use Getopt::Long;

select(STDERR); $| = 1;       # make stderr unbuffered

my( $dbfile, $opt_add, $opt_delete, $opt_fail, $opt_hook,
   $opt_list, $opt_repeat, $opt_verbose );

my $usage =
    "Usage:\n" .
    " Act as aub external hook:\n" .
    "  aub_hook --db dbfile --hook [--fail] [--verbose]\n" .
    " Add names provided via stdin to DB:\n" .
    "  aub_hook --db dbfile --add [--verbose]\n" .
    " Delete names provided via stdin from DB:\n" .
    "  aub_hook --db dbfile --delete [--verbose]\n" .
    " List match between names provided via stdin and DB:\n" .
    "  aub_hook --db dbfile --repeat\n" .
    " List names in database:\n" .
    "  aub_hook --db dbfile --list\n";

if ( ! GetOptions(
		  'add!'	=> \$opt_add,
		  'delete!'	=> \$opt_delete,
		  'db=s'	=> \$dbfile,
		  'fail!'	=> \$opt_fail,
		  'hook!'	=> \$opt_hook,
		  'list!'	=> \$opt_list,
		  'repeat!'	=> \$opt_repeat,
		  'verbose!'	=> \$opt_verbose
		  ) ) {
    print( STDERR $usage );
    exit(1);
}
if( ! defined($dbfile) ) {
    print( STDERR "Error: Data base file not specified, use --db option\n");
    print( STDERR $usage);
    exit(1);
}
if( !($opt_hook||$opt_add||$opt_delete||$opt_list||$opt_repeat) ) {
    print( STDERR "Error: action not specified\n");
    print( STDERR $usage);
    exit(1);
}


#
# Build-up regular expression for file extensions we accept.
#
#
# File extensions that we support
#
my @fileExtensions=( 'avs', 'bmp', 'cgm', 'eps', 'gif', 'hdf',
             'jbig', 'jpeg', 'jpg', 'mif', 'miff', 'mpeg', 'mpg',
             'pcl', 'pcx', 'pdf', 'pic', 'png', 'pnm', 'ppm',
             'ps', 'rle', 'tga', 'tif', 'tiff', 'xbm', 'xpm', 'xwd');

my $include='(';
my $firstinc=1;
foreach my $ext (@fileExtensions) {
    if(! $firstinc) {
	$include .= "|";
    }
    $include .= "\\w+\\.${ext}";
    $firstinc = 0;
}
$include .= ')';

#
# Build-up regular expression for file names we don't accept
# which match the include list
#
my $exclude="(^\\.)";

#
# Select operation
#
if( $opt_hook ) {
    filterDb();
} elsif ( $opt_add ) {
    addDb();
} elsif ( $opt_delete ) {
    deleteDb();
} elsif ( $opt_list ) {
    listDb();
} elsif ( $opt_repeat ) {
    repeatsDb();
}
exit(0);

# Take a subject line from stdin and attempt to extract filename.
# Return with exit code zero if this file type is accepted and it
# doesn't already exist in the database. If entry doesn't already
# exist in database and it is a supported type then add it.
# Otherwise, return non-zero exit code.
sub filterDb {
    my $fname;	# filename to match
    my $newfile = 0;

    # if we got a subject line
    if( defined($_ = <STDIN>) ) {
	print( STDERR $_ ) if $opt_verbose;
	chop;
	# extract filename from subject line
	($fname) = m/$include/io;
	# if we are able to match a desired file name in subject
	if( defined($fname) ) {
	    # if not in the ignore list
	    if( $fname !~ m/$exclude/io ) {
		my %files;	# database hash
		my $db = tie( %files, 'DB_File', $dbfile, O_RDWR|O_CREAT, 0644, $DB_HASH)
		    or die "Opening $dbfile: $!\n";
		# if not already in database, then add
		if ( ! $files{$fname} ) {
		    print( STDERR "Adding: $fname\n" )  if $opt_verbose;
		    $files{$fname} = 1;
		    $newfile = 1;
		} else {
		    print( STDERR "Already in DB: $fname\n" ) if $opt_verbose;
		}
		$db->sync();	# sync database to disk
		undef $db;	# close database
		untie %files;	# remove binding to database
	    } else {
		print( STDERR "Excluded file \"$fname\"\n" ) if $opt_verbose;
	    }
	} else {
	    print( STDERR "No matching image:\"$_\"\n" ) if $opt_verbose;
	}
    } else {
	print( STDERR "Subject line not supplied!\n");
    }

    # if new file and we are not always reporting failure
    if( $newfile && ! $opt_fail ) {
	exit( 0 );
    }
    exit( 1 );
}

#
# Load database from list delivered via stdin
#
sub addDb {
    my %files;
    my $db;
    $db = tie( %files, 'DB_File', $dbfile, O_RDWR|O_CREAT, 0644, $DB_HASH)
	or die "Opening $dbfile: $!\n";
    while( defined($_ = <STDIN>) ) {
	chop;
	my $base = basename($_);
	next if(
		( $base !~ m/$include/io ) ||
		( $base =~ m/$exclude/io )
		);
	if ( ! $files{$base} ) {
	    print( STDERR "Adding: $base\n" ) if $opt_verbose;
	    $files{$base} = 1;
	} else {
	    print( STDERR "Repeat: $_\n" ) if $opt_verbose;
	}
    }
    $db->sync();	# sync database to disk
    undef $db;		# close database
    untie %files;	# remove binding to database
}

#
# Delete database entries specified by list delivered via stdin
#
sub deleteDb {
    my %files;
    my $db;
    $db = tie( %files, 'DB_File', $dbfile, O_RDWR|O_CREAT, 0644, $DB_HASH)
	or die "Opening $dbfile: $!\n";
    while( defined($_ = <STDIN>) ) {
	chop;
	my $base = basename($_);
	if ( $files{$base} ) {
	    undef $files{$base};
	}
    }
    $db->sync();	# sync database to disk
    undef $db;		# close database
    untie %files;	# remove binding to database
}

#
# List contents of database to stdout
#
sub listDb {
    my %files;
    my $db;
    $db = tie( %files, 'DB_File', $dbfile, O_RDWR, 0644, $DB_HASH)
	or die "Opening $dbfile: $!\n";
    foreach my $key (sort keys %files) {
	if(defined($files{$key})) {
	    print( $key, "\n" );
	}
    }
    undef $db;	# close database
    untie %files;	# remove binding to database
}


#
# List repeated filenames from list delivered via stdin
#
sub repeatsDb {
    my %files;
    my $db;
    $db = tie( %files, 'DB_File', $dbfile, O_RDWR, 0644, $DB_HASH)
	or die "Opening $dbfile: $!\n";
    while( defined($_ = <STDIN>) ) {
	chop;
	my $base = basename($_);
	next if(
		( $base !~ m/$include/io ) ||
		( $base =~ m/$exclude/io )
		);
	if ( $files{$base} ) {
	    print( "$_\n" );
	}
    }
    undef $db;	# close database
    untie %files;	# remove binding to database
}

#
# Return file name portion of path
#
sub basename {
   my($name) = @_;
    $name =~ s:([^\/]*/)+::;
    return($name);
}

