#!/usr/bin/perl # # $Id: spam-migrator,v 1.1 2004/08/31 19:16:28 rslomkow Exp $ # # Copyright Robin * Slomkowski, 2004 # spam-migrator@s.rslomkow.org # # This software is available under the GPL # http://www.gnu.org/licenses/gpl.txt # # The basic idea behind spam-migrator is to have server side spam # statistical spam categorization. This is useful for people that use # lots of different clients. Web mail when traveling, mozilla mail # on Linux, Outlook at work, and mutt from time to time. You want # spam sorted, but not on the client. # # You can do this with bogofilter (http://bogofilter.sourceforge.net/), # but you still have to train and correct it. This tool lets # you use bogofilter (in theory some other program as well) # in conjunction with an imap server that uses the Maildir # (http://cr.yp.to/proto/maildir.html) storage mechanism such as # (qmail or courier imap). You just train it by moving messages # between folders on whatever imap client use like to use. # # You run the script from cron (say once per hour) # # min hour day month dayOfWeek Command # 17 * # * * * /home/username/bin/spam-migrator # # and it moves the email between mailboxes (pretty safely), and sends # it through your training program. # # You can customize the program per user using the # $HOME/.spam-migrator-rc, you can change any of the %config variables # with a simple KEY = value syntax. # # Just make sure you subscribe to those folders so they show up in # your client. # use strict ; my ( $DEBUG, $RC_FILE, %config, ) ; # Defaults $DEBUG = 0 ; $RC_FILE = '.spam-migrator-rc' ; # Base directory in Maildir format $config{'DIR'} = "$ENV{'HOME'}/Maildir" ; # The directory with messages to be marked as spam $config{'SPAM_DIR'} = "$config{'DIR'}/.markasspam" ; # The directory with messages that were incorrectly marked as spam $config{'HAM_DIR'} = "$config{'DIR'}/.markasnotspam" ; # The directory where messages that are spam should live $config{'BAD_DIR'} = "$config{'DIR'}/.spam" ; # The directory that messages that a person wishes to keep should go $config{'GOOD_DIR'} = "$config{'DIR'}" ; # Path to Bogofilter $config{'BOGOFILTER'} = '/usr/bin/bogofilter' ; # Flags to remove words from non-spam list and add those words to spam $config{'SPAM_FLAGS'} = '-Ns' ; # Flags to remove words from the spam list and add those words to the non-spam $config{'HAM_FLAGS'} = '-Sn' ; MAIN: { my ( @spam_files, @ham_files, ) ; read_config ( "$ENV{'HOME'}/${RC_FILE}" ) ; @spam_files = get_mail ($config{'SPAM_DIR'}) ; mark_spam ( $config{'SPAM_DIR'}, $config{'BAD_DIR'}, $config{'SPAM_FLAGS'}, @spam_files ) ; @ham_files = get_mail ($config{'HAM_DIR'}) ; mark_spam ( $config{'HAM_DIR'}, $config{'GOOD_DIR'}, $config{'HAM_FLAGS'}, @ham_files ) ; exit 0 ; } # # Functions # sub move { # source file, destination file # return 1 on success, 0 on failure # # This just links the file from one place to another and # if succesful removes the original # if ( ! -f $_[1] ) { if ( link ($_[0], $_[1]) ) { if ( unlink $_[0] ) { return 1 ; } else { warn "$!: problem unlinking source file $_[0]" ; return 0 ; } } else { warn "$!: problem linking $_[0] to $_[1]" ; return 0 ; } } else { warn "$!: destination file $_[1] exists!" ; return 0 ; } } sub get_mail { # INPUT: $directory # OUTPUT: @ARRAY_of_relative_filenames # # This finds all the mail files in "cur" and "new" in a Maildir folder # my ( @files, $dir ) ; my @src_dirs = ( "cur", "new" ) ; foreach $dir (@src_dirs) { if ( ! -d "$_[0]/$dir" ) { warn "creating Maildir at $_[0]" if $DEBUG > 0 ; create_maildir_dir ( "$_[0]" ) ; } if ( opendir DIR, "$_[0]/$dir" ) { my $file ; foreach $file ( readdir DIR ) { if ( "$file" ne '.' && "$file" ne '..' ) { push @files, "$dir/$file" ; } } } else { warn ("$!: problem with $_[0]/$dir") ; } closedir DIR ; } return @files ; } sub mark_spam { # INPUT: source director, destination directory, bogofilter flags, @files # # This moves the file, then it pipes the output of the moved file to bogofilter # with the given flags # my $file ; my $src_dir = shift @_ ; my $dst_dir = shift @_ ; my $flags = shift @_ ; foreach $file (@_) { if (move ("$src_dir/$file", "$dst_dir/$file")) { open BOGO, "| $config{'BOGOFILTER'} $flags" || warn "$!: opening $config{'BOGOFILTER'}" ; open FILE, "$dst_dir/$file" || warn "$!: on $dst_dir/$file" ; while () { print BOGO ; } close FILE ; close BOGO ; } } return 1 ; } sub read_config { # INPUT: give it a config file # # It overwrites already defined configuration peramaters. # Config file format is # # KEY = value # # with standard shell script comments # if ( -r $_[0] ) { open DATA, "$_[0]" ; while () { if ( $_ =~ /^$/ || $_ =~ /^\s*#/ ) { next ; } elsif ( $_ =~ /^\s*(\S+)\s*=\s*(\S.*)$/ ) { my ( $key, $value ) = ( $1, $2 ) ; if ( defined ($config{$key}) ) { $config{$key} = $value ; warn "$key set to $value" if $DEBUG > 1; } } } close DATA ; return 1 ; } else { warn "cannot read $_[0]" if $DEBUG > 0 ; return 0 ; } } sub create_maildir_dir { # INPUT: a directory name # # This will setup the directory as a Maildir directory with # "new", "cur", and "tmp" subdirectories. These should be created # only under the $config{'DIR'} directive and should be mode 0700 my ($dir, @sub_dirs) ; $dir = shift @_ ; @sub_dirs = ('new', 'cur', 'tmp') ; if ( -d "$dir" ) { foreach ( @sub_dirs ) { if ( ! -x "$dir/$_" ) { mkdir "$dir/$_", 0700 || warn "$!: problem creating $dir/$_"; } else { warn "something exists at $dir/$_" if $DEBUG > 1 ; } } } else { if ( -x "$dir" ) { warn "something other than directory exists at $dir" ; } elsif ( mkdir ("$dir", 0700) ) { create_maildir_dir ("$dir") ; } else { warn "$!: problem creating $dir" ; } } }