#!/usr/bin/perl

use warnings;
use strict;
use LWP::UserAgent;
use HTML::Form;
use HTTP::Cookies;
use Getopt::Std;
use Getopt::ArgvFile qw(argvFile);




my $delay = 1;        # Delay between HTTP requests. Adjust this if Spamcop.net asks!


#############################################################################
#
# Spamcup - A tool for finishing Spamcop.net reports.
#
#  Copyright (C) Toni Willberg <toniw@iki.fi> http://toniw.iki.fi/
#
#  Get lastest version from:
#        http://toniw.iki.fi/index.cgi/projects/projects-spamcup
#
#############################################################################
#
# * Instructions:
#  Forward you spam to your Spamcop.net address given by Spamcop.net.
#  Wait while Spamcop.net processes the spam.
#
#  Finish reporting by using this script. The script does exactly the same
#  you would do by surfing to http://spamcop.net/ and clicking spam
#  reporting links and buttons. The script uses default selections of
#  checkboxes that Spamcop suggests.
#
#############################################################################
#
#  *** W A R N I N G ! ***
#
#  The script does NOT know where the spam report will be sent so
#   IT'S YOUR RESPONSIBILITY!
#
#  If the script asks spamcop to send reports to wrong places
#   IT'S YOUR FAULT!
#
#  If the script has a bug that causes same report being sent thousand times
#   IT'S YOUR MAIL ADDRESSES!
#
#  DO NOT USE THIS SCRIPT IF YOU DON'T UNDERSTAND WHAT IT DOES!
#   IT'S YOUR SHAME!
#
#############################################################################
#
# Copyright (C) Toni Willberg
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
#############################################################################





#############################################################################
#
# No user serviceable parts inside. Consult your local Perl guru if you feel
# like modifying something. Also see the GPL licence!
#
# Remember to submit all modifications to the author also!
#
#############################################################################






#############################################################################
#############################################################################
#############################################################################



my $version = "1.08";




# get optional parameters from configfile .spamcup
argvFile(startupFilename => '.spamcup', home=> "1" );


# get parameters from command line
my %opt;
getopts('vnac:sqdhDul:p:', \%opt);

my $SCident = shift;
my $SCpass = '';

my $footer =  qq(* Spamcup $version  Bug reporting info: http://sourceforge.net/projects/spamcup/
* (C) Toni Willberg <toniw\@iki.fi>  http://toniw.iki.fi/ \n);


if ($opt{v}) {
    print $footer;
    exit;
}

if ($opt{h} || (!$SCident && !$opt{l} && !$opt{c}) || ($SCident && $opt{l}) || ($SCident && $opt{c})) {
        print qq(Usage: $0 [options] <Spamcop-Username>\n
 $0 <options> <Spamcop-Username>

 Options:
  -n Does nothing, just shows if you have unreported spam or not.
  -a Run in a loop untill all spam is reported.
  -s Stupid. Runs without asking confirmation. Use with care.
  -q Be quiet.
  -c Alternate method for signifying code. (Unpaid users WITHOUT username & password)
  -l Alternate method for providing username. (Paid & unpaid users with password)
  -p Method for providing password. (Required for users with password)
  -d Debug mode. Prints all kinds of funny things.
  -D Even more debug mode. Dumps also HTML.
  -v Show version and quit.
  -h You are reading it.

 By default the script confirms every spam its about to report. With option -s it does not
 ask for confirmation.

 You can combine one or more options. You MUST put options before the code.

 See the README file for information about optional configuration file!

$footer
);
        exit;
}

if ($opt{l}) {
    $SCident=$opt{l};
} elsif ($opt{c}) {
    $SCident=$opt{c};
}

if ($SCident =~ /\@/ ) {
    if (!$opt{p} || $opt{p} eq '') {
        print "Enter password: ";
        $SCpass = <STDIN>;
        chomp($SCpass);
    } else {
        $SCpass = $opt{p};
    }
} else {
    undef $SCpass;
}

# DEBUG++: option -D dumps all HTML, used with in development only
# force d if D
if ($opt{D}) {
    $opt{d} = 1;
}


if ($opt{n}) {
    print "* Running with -n. Not actually reporting spam, just showing what is about to happen.\n";
}

if (!$opt{q}) {
    print "* Using ID '$SCident'.\n";

    if ($opt{a}) {
        print "* Running with -a. Runs in a loop while all spam is reported.\n";
    }

    if ($opt{d}) {
        print "* Running with -d. Debug mode. Prints all kinds of funny things.\n";
    }
    if ($opt{D}) {
        print "* Running with -D. Even more debug mode. Dumps also HTML.\n";
    }


    if ($opt{s}) {
        print "* Running with -s, which is stupid. I won't ask confirmation for spam reports. Hope you know what you are doing...\n";
        sleep $delay;
    }

}

# create http client
my $ua = LWP::UserAgent->new;
$ua->agent("spamcup/$version");
$ua->cookie_jar(HTTP::Cookies->new());

my $req;
my $res;

my $lastseenspamid; # to avoid infinite loop


# BEGIN OF THE MAIN LOOP
sub mainloop {


    #############################################################################
    #
    # Get first page that contains link to next one...
    #

    if ($opt{d}) { # debug
        if ($SCpass) {
            print "D: GET http://$SCident:******\@members.spamcop.net/\n";
        } else {
            print "D: GET http://www.spamcop.net/?code=$SCident\n";
        }
    }



    if ($opt{d}) {
        print "D: Sleeping for $delay seconds.\n";
    }
    sleep $delay;

    if ($SCpass) {
        $req = HTTP::Request->new(GET => 'http://members.spamcop.net/');
        $req->authorization_basic($SCident, $SCpass);
    } else {
        $req = HTTP::Request->new(GET => 'http://www.spamcop.net/?code='.$SCident);
    }
#    $ua->cookie_jar->add_cookie_header($req);
    $res = $ua->request($req);

    # verify response
    if ($res->is_success) {
        if ($opt{d}) { # debug
            print "D: Got HTTP response\n";
            # print "D: Headers follow:\n". $res->headers->as_string ."\n\n";
        }
    } else         {
            die "E: Can\'t connect to server or invalid credentials. Re-enter login/password or try again later.\n";
    }


    if ($opt{D}) {
        print "\n--------------------------------------------------------------------------\n";
        print $res->content;
        print "--------------------------------------------------------------------------\n\n";
    }

    #############################################################################
    #
    # Parse id for link
    #
    my $nextid;
    if ($res->content =~ /sc\?id\=(.*?)\"\>/gi) { # this is easy to parse
        # userid ok, new spam available
        $nextid = $1;
    }
    elsif ($res->content =~ /\>No userid found\</i ) {
        # unknown userid
        die "E: No userid found. Please check that you have entered correct code.\n";
    }
    else {
        # userid ok, no new spam
        if (!$opt{q}) {
            print "* No unreported spam found. Quitting.\n";
        }
        return -1; # quit
    }

    if (!$opt{q}) {
        print "* ID of the next spam is '$nextid'.\n";
    }

    # avoid loops
    if ($lastseenspamid && $nextid eq $lastseenspamid) {
        die "E: I have seen this ID earlier. We don't want to report it again. This usually happens because of a bug in Spamcup. Make sure you use latest version! You may also want to go check from Spamcop what's happening: http://www.spamcop.net/sc?id=$nextid\n";
    }

    $lastseenspamid = $nextid; # store for comparison

    undef $req;
    undef $res;


    #############################################################################
    #
    # Fetch the spam report form
    #

    if ($opt{d}) {
        print "D: GET http://www.spamcop.net/sc?id=$nextid\n";
        print "D: Sleeping for $delay seconds.\n";
    }
    sleep $delay;

    $req = HTTP::Request->new(GET => 'http://www.spamcop.net/sc?id='.$nextid);
#    $ua->cookie_jar->add_cookie_header($req);
    $res = $ua->request($req);

    if ($res->is_success) {
        if ($opt{d}) {
            print "D: Got HTTP response\n";
           # print "D: Headers follow:\n". $res->headers->as_string ."\n\n";
        }

    } else         {
            die "E: Can't connect to server. Try again later.\n\n";
    }

    if ($opt{D}) {
        print "\n--------------------------------------------------------------------------\n";
        print $res->content;
        print "--------------------------------------------------------------------------\n\n";
    }



    #############################################################################
    #
    # parse the spam
    #

    my $_cancel = 0;

    my $base_uri = $res->base();
    if (!$base_uri) {
        print "E: No base uri found. Internal error? Please report this.\n";
        exit;
    }

#    $res->content =~ /(\<form action.*?name=\"sendreport\"\>.*?\<\/form\>)/sgi;
    $res->content =~ /(\<form action[^>]+name=\"sendreport\"\>.*?\<\/form\>)/sgi;
    my $formdata = "<html><body>$1</body></html>";
    my $form = HTML::Form->parse($formdata, $base_uri);

    #
    # print the header of the spam
    #

    my $spamhead;
    if ($res->content =~ /Please make sure this email IS spam.*?size=2\>\n(.*?)\<a href\=\"\/sc\?id\=$nextid/sgi ) { # this is also quite easy...
        # this is the normal case

            $spamhead = $1;
            if (!$opt{q} ) {
                print "* Head of the spam follows >>>\n";
                $spamhead =~ s/\n/\t/igs; # prepend a tab to each line
                $spamhead =~ s/<br>/\n/gsi; # simplify a bit
                print "\t$spamhead\n";
                print "<<<\n";
            }



    #############################################################################
    #
    # parse form fields
    #

    # verify form
    if (!$form) {
        if ($opt{d}) {
            print "D: Spamcop returned invalid HTML form. Usually temporary error.\n";
        }
        die "E: Temporary Spamcop.net error. Try again later! Quitting.\n";
    }
    else {
        if ($opt{d}) {
            print "D: Form data follows:\n". $form->dump ."\n\n";
        }

        # how many recepients for reports
        my $max = $form->value("max");


        my $willsend;
        my $wontsend;

        # iterate targets
        for (my $i=1; $i <= $max; $i++) {
            my $send = $form->value("send$i");
            my $type = $form->value("type$i");
            my $master = $form->value("master$i");
            my $info = $form->value("info$i");

            # convert %2E -style stuff back to text, if any
            if ( $info =~ /%([A-Fa-f\d]{2})/g ) {
                $info =~ s/%([A-Fa-f\d]{2})/chr hex $1/eg;
            }

            if ($send and (
                    ($send eq 'on')
                        or
                    ($type =~ /^mole/ and $send == 1 )
                ) ) {
                $willsend .= "\t$master \t($info)\n";
            }
            else {
                $wontsend .= "\t$master \t($info)\n";
            }
        }

        print "Would send the report to the following addresses: (Reason in parenthesis)\n";
        if ($willsend) {
            print $willsend;
        } else {
            print "\t--none--\n";
        }


        print "Following addresses would not be used:\n";
        if ($wontsend) {
            print $wontsend;
        } else {
            print "\t--none--\n";
        }



    }

            # Run without confirming each spam? Stupid. :)
            if (!$opt{s}) {
                print "* Are you sure this is spam? [y/N] ";

                my $reply = <>; # this should be done differently!
                if ($reply && $reply !~ /^y/i) {
                    print "* Cancelled.\n";
                    $_cancel = 1; # mark to be cancelled
                }
                elsif (!$reply) {
                    print "* Accepted.\n";
                }
                else {
                    print "* Accepted.\n";
                }
            }
            else {
                # little delay for automatic processing
                sleep $delay;
            }
            print "...\n";

        }
    elsif ($res->content =~ /Send Spam Report\(S\) Now/gi) {
        # this happens rarely, but I've seen this; spamcop does not show preview headers for some reason
        if (!$opt{s}) {
            print "* Preview headers not available, but you can still report this. Are you sure this is spam? [y/N] ";

            my $reply = <>;
            if ($reply && $reply !~ /^y/i) {
                # not Y
                print "* Cancelled.\n";
                $_cancel = 1; # mark to be cancelled
            }
            else {
                # Y
                print "* Accepted.\n";
                }
        }

    }
    elsif ($res->content =~ /Sorry, this email is too old.*This mail was received on (.*?)\<\/.*\>/gsi) {
            # perhaps it's too old then
            my $ondate = $1;
            if (!$opt{q}) {
                print "W: This spam is too old. You must report spam within 3 days of receipt. This mail was received on $ondate. Deleted.\n";
            }
            return 0;

        }
    elsif ($res->content =~ /click reload if this page does not refresh automatically in \n(\d+) seconds/gs) {
        my $delay = $1;
        print "W: Spamcop seems to be currently overloaded. Trying again in $delay seconds. Wait...\n";
        sleep $delay;
        $lastseenspamid = 0; # fool it to avoid duplicate detector

        return 1; # fake that everything is ok
    }
    elsif ($res->content =~ /No source IP address found, cannot proceed. Not full header/gs) {
        print "W: No source IP address found. Your report might be missing headers. Skipping.\n";
        return 0;
    }

    else {
        # Shit happens. If you know it should be parseable, please report a bug!
        print "W: Can't parse Spamcop.net's HTML. If this does not happen very often you can ignore this warning. Otherwise check if there's new version available. Skipping.\n";
        return 0;
    }


    #############################################################################
    #
    # STOP if -n
    #
    if ($opt{n}) {
        print "* You gave option -n, so we'll stop here. The spam was NOT reported.\n";
        exit;
    }


    if ($opt{d}) {
        print "\n\nD: Starting the parse phase...\n";
    }

    undef $req;
    undef $res;


    #############################################################################
    #
    # Submit the form to Spamcop OR cancel report
    #

    if (!$_cancel) { # SUBMIT spam

        if ($opt{d}) {
            print "D: Submitting form. We will use the default recipients.\n";
            print "D: GET http://www.spamcop.net/sc?id=$nextid\n";
            print "D: Sleeping for $delay seconds. We don't want to jam Spamcop.\n";
        }
        sleep $delay;
        $res = LWP::UserAgent->new->request( $form->click() ); # click default button, submit
    }
    else { # CANCEL SPAM
        if ($opt{d}) {
            print "D: About to cancel report.\n";
        }
        $res = LWP::UserAgent->new->request( $form->click('cancel') ); # click cancel button
    }

    # Check the outcome of the response
    if ($res->is_success) {
        if ($opt{d}) {
            print "D: Got HTTP response\n";
            print "D: -- content follows -------------------------\n";
            print $res->content;
            print "D: -- content ended   -------------------------\n\n";
        }


    } else         {
        die "E: Can't connect to server. Try again later. Quitting.\n";
    }


    if ($_cancel) {
        return 1; # user decided this mail is not spam
    }

    # parse respond
    my $report;
    if ($res->content =~ /(Spam report id .*?)\<p\>/gsi) {
        $report = $1 || "-none-\n";
        $report =~ s/\<br\>//gi;
    }
    elsif ( $res->content =~ /report for mole\@devnull.spamcop.net/ ) {
        $report = 'Mole report(s)';
    }
    else {
        print "W: Spamcop.net returned unexpected content. If this does not happen very often you can ignore this. Otherwise check if there new version available. Continuing.\n";
    }



    #############################################################################
    #
    # print the report
    #

    if (!$opt{q}) {
        print "Spamcop.net sent following spam reports:\n";
        print "$report\n" if $report;
        print "* Finished processing.\n";
    }

    return 1;

# END OF THE LOOP
}



# let's run the beast

my $retval;

if ($opt{a} && !$opt{n}) {

    # run while there is more spam
    while (1) { # Ugly, but does the thing...
        $retval = &mainloop ();
        last if ($retval == -1); # no more spam


        if (!$opt{q}) {

            if (!$retval) {
                print "W: Error occured while processing spam. Continuing.\n";
            }
            print "\n-------------------------------------------------------\n* Processing next spam.\n";
        }
    }
}
else {
    # run once
    &mainloop();
}

if (!$opt{q}) {
    print "* Spamcup $version  (C) Toni Willberg <toniw\@iki.fi>  http://toniw.iki.fi/\n";
}



#############################################################################
# Do not enter beyond this line
