#!/usr/bin/perl
#
# $Id: spam-migrator,v 1.1 2004/08/31 19:16:28 rslomkow Exp $
#
# Copyright Robin * Slomkowski, 2004
# spam-migrator@s.rslomkow.org
#
# This software is available under the GPL
# http://www.gnu.org/licenses/gpl.txt
#
# The basic idea behind spam-migrator is to have server side spam
# statistical spam categorization.  This is useful for people that use
# lots of different clients.  Web mail when traveling, mozilla mail
# on Linux, Outlook at work, and mutt from time to time.  You want
# spam sorted, but not on the client.
#
# You can do this with bogofilter (http://bogofilter.sourceforge.net/),
# but you still have to train and correct it.  This tool lets
# you use bogofilter (in theory some other program as well)
# in conjunction with an imap server that uses the Maildir
# (http://cr.yp.to/proto/maildir.html) storage mechanism such as
# (qmail or courier imap).  You just train it by moving messages
# between folders on whatever imap client use like to use.
#
# You run the script from cron (say once per hour)
#         # min   hour    day     month   dayOfWeek Command
#         17      * #                 *       *       *       /home/username/bin/spam-migrator
#
# and it moves the email between mailboxes (pretty safely), and sends
# it through your training program.
#
# You can customize the program per user using the
# $HOME/.spam-migrator-rc, you can change any of the %config variables
# with a simple KEY = value syntax.
#
# Just make sure you subscribe to those folders so they show up in
# your client.
#

use strict ;

my (
     $DEBUG,
     $RC_FILE,
     %config,
    ) ;

# Defaults
$DEBUG = 0 ;
$RC_FILE = '.spam-migrator-rc' ;

# Base directory in Maildir format
$config{'DIR'} = "$ENV{'HOME'}/Maildir" ;
# The directory with messages to be marked as spam
$config{'SPAM_DIR'} = "$config{'DIR'}/.markasspam" ;
# The directory with messages that were incorrectly marked as spam
$config{'HAM_DIR'} = "$config{'DIR'}/.markasnotspam" ;
# The directory where messages that are spam should live
$config{'BAD_DIR'} = "$config{'DIR'}/.spam" ;
# The directory that messages that a person wishes to keep should go
$config{'GOOD_DIR'} = "$config{'DIR'}" ;
# Path to Bogofilter
$config{'BOGOFILTER'} = '/usr/bin/bogofilter' ;
# Flags to remove words from non-spam list and add those words to spam 
$config{'SPAM_FLAGS'} = '-Ns' ;
# Flags to remove words from the spam list and add those words to the non-spam
$config{'HAM_FLAGS'} = '-Sn' ;

MAIN: {
	my (
	    @spam_files,
	    @ham_files,
	   ) ;

	read_config ( "$ENV{'HOME'}/${RC_FILE}" ) ;

	@spam_files = get_mail ($config{'SPAM_DIR'}) ;
	mark_spam ( $config{'SPAM_DIR'},
	            $config{'BAD_DIR'},
		    $config{'SPAM_FLAGS'},
		    @spam_files 
		  ) ;

	@ham_files = get_mail ($config{'HAM_DIR'}) ;
	mark_spam ( $config{'HAM_DIR'},
	            $config{'GOOD_DIR'},
		    $config{'HAM_FLAGS'},
		    @ham_files
		  ) ;

exit 0 ; }

#
# Functions
#

sub move {
# source file, destination file
# return 1 on success, 0 on failure
# 
# This just links the file from one place to another and 
# if succesful removes the original
#
	if ( ! -f $_[1] ) {
	   if ( link ($_[0], $_[1])  ) {
	      if ( unlink $_[0] ) {
	         return 1 ;
	      } else {
	         warn "$!: problem unlinking source file $_[0]" ;
		 return 0 ;
	      }
	   } else {
	      warn "$!: problem linking $_[0] to $_[1]" ;
	      return 0 ;
	   }
	} else {
	   warn "$!: destination file $_[1] exists!" ;
	   return 0 ;
	}
}

sub get_mail {
# INPUT: $directory
# OUTPUT: @ARRAY_of_relative_filenames
# 
# This finds all the mail files in "cur" and "new" in a Maildir folder
# 
	my ( @files, $dir ) ;
	my @src_dirs = ( "cur", "new" ) ;

	foreach $dir (@src_dirs) {
		if ( ! -d "$_[0]/$dir" ) {
			warn "creating Maildir at $_[0]" if $DEBUG > 0 ;
			create_maildir_dir ( "$_[0]" ) ;
		}

		if ( opendir DIR, "$_[0]/$dir" ) {
			my $file ;
			foreach $file ( readdir DIR ) {
				if ( "$file" ne '.'
				     && "$file" ne '..'
				   ) {
					push @files, "$dir/$file" ;
				     }
			}
		} else {
			warn ("$!: problem with $_[0]/$dir") ;
		}
		closedir DIR ;
	}
	return @files ;
}

sub mark_spam {
# INPUT: source director, destination directory, bogofilter flags, @files
#
# This moves the file, then it pipes the output of the moved file to bogofilter
# with the given flags
#
	my $file ;
	my $src_dir = shift @_ ;
	my $dst_dir = shift @_ ;
	my $flags = shift @_ ;

	foreach  $file (@_) {
		if (move ("$src_dir/$file", "$dst_dir/$file")) {
			open BOGO, "| $config{'BOGOFILTER'} $flags"
				|| warn "$!: opening $config{'BOGOFILTER'}" ;
			open FILE, "$dst_dir/$file"
				|| warn "$!: on $dst_dir/$file" ;
			while (<FILE>) {
				print BOGO ;
			}
			close FILE ;
			close BOGO ;
		}
	}

	return 1 ;
}

sub read_config {
# INPUT: give it a config file
#
# It overwrites already defined configuration peramaters.
# Config file format is
#
# KEY = value
# 
# with standard shell script comments
#
	if ( -r $_[0] ) {
		open DATA, "$_[0]" ;
		while (<DATA>) {
			if ( $_ =~ /^$/ || $_ =~ /^\s*#/ ) {
				next ;
			} elsif ( $_ =~ /^\s*(\S+)\s*=\s*(\S.*)$/ ) {
				my ( $key, $value ) = ( $1, $2 ) ;
				if ( defined ($config{$key}) ) {
					$config{$key} = $value ;
					warn "$key set to $value" if $DEBUG > 1;
				}
			}
		}
		close DATA ;
		return 1 ;
	} else {
		warn "cannot read $_[0]" if $DEBUG > 0 ;
		return 0 ;
	}
}

sub create_maildir_dir {
# INPUT: a directory name
#
# This will setup the directory as a Maildir directory with
# "new", "cur", and "tmp" subdirectories.  These should be created
# only under the $config{'DIR'} directive and should be mode 0700
	my ($dir, @sub_dirs) ;

	$dir = shift @_ ;
	@sub_dirs = ('new', 'cur', 'tmp') ;

	if ( -d "$dir" ) {
	   foreach ( @sub_dirs ) {
		if ( ! -x "$dir/$_" ) {
		   mkdir "$dir/$_", 0700 || warn "$!: problem creating $dir/$_";
		} else {
		   warn "something exists at $dir/$_" if $DEBUG > 1 ;
		}
	   }
	} else {
	  if ( -x "$dir" ) {
	  	warn "something other than directory exists at $dir" ;
	  } elsif ( mkdir ("$dir", 0700) ) {
	  	create_maildir_dir ("$dir") ;
	  } else {
	  	warn "$!: problem creating $dir" ;
	  }
	}
}
