package wlgmod::jpg;

# jpg word extract plugin
#
# You need 'exif' to be able to use this plugin
# Get it at: http://sourceforge.net/projects/libexif
#
# Written by Aaron Peterson [ aaron (@) midnightresearch.com ]

my $exif="";

# Set this if you want to grab all tags instead of the preconfigured ones in @tags
my $alltags=0;

# These are the list of tags that we look for and dump.  These are all the ones
# that I could find that would have non-programmatic user input (as opposed to
# fields like "PixelXDimension" or "SubSecTimeDigitized".  If you want all
# fields (which could generate a lot of false positives (e.g. numbers, etc),
# then set $alltags to '1'
my @tags=("Document Name",
          "Image Description",
          "Artist",
          "Maker Note",
          "User Comment",
          "Subject Location",
          "File Source",
          "Scene Type",
          "Software",
          "Scene Capture Type",
          "Device Setting Description",
          "Image Unique ID",
          "Unknown");

# set to '1' for extra messages
$debug=0;

sub init {
	open(FILE, "which exif|");
	chomp($exif = <FILE>);
	close(FILE);	  
	if ($?) {
		return "Cannot find 'exif' (http://sourceforge.net/projects/libexif)";
	}
	return "";
}


sub get_words {
	my $this = shift;
	my $filename = shift;
	my @words;

	$debug && print "Starting exif scan of file [$filename]\n";
	open(FILE, "$exif \"$filename\"|" ) || die "Cannot open $filename: $!";

	foreach my $line (<FILE>) {
		
		# Ignore headers
		next if ($line =~ m/^-/);
		next if ($line =~ m/^Tag\s/);

		# Get the tags and comments
		$line =~ m/^(.*?)[\s]*\|(.*?)[\s]*$/i;
		my $tag=$1;
		my $comment=$2;

		chomp($tag);
		chomp($comment);

		# skip it if it's blank.
		next if (($comment eq "") || ($tag eq ""));

		$debug && print "Got tag [$tag] and comment [$comment]\n";


		# ignore tags that we don't care about
		if($alltags==0) {
			my $found=0;

			$debug && print "Filtering exif tags\n";
			foreach my $ltag (@tags) {
				if ($tag eq $ltag) {
					$found=1;
					$debug && print "Ignoring exif tag [$tag]\n";
					last;
				} else {
					$debug && print "Using all exif tags\n";
				} 
			}

			# This means it's not a tag we care about.
			next if ($found==0);
		}


		$debug && print "Adding words for comment [$comment]\n";
		foreach my $lc (split(/\s/, $comment)) {
			$lc =~ s/\s+//mg;
			$lc =~ s/[,.;:?]+//mg;
			
			if($lc ne "") {
				push @words, $lc;
			}
		}
	}
	close(FILE);

	return (@words);
}

1;

# vim:ts=2:sw=2:sts=0
