(file) Return to email2rss.pl CVS log (file) (dir) Up to [home] / email2rss

File: [home] / email2rss / email2rss.pl (download) / (as text)
Revision: 1.11, Tue Jan 25 22:00:22 2005 UTC (5 years, 7 months ago) by ws
Branch: MAIN
CVS Tags: HEAD
Changes since 1.10: +1 -1 lines
increase wrap size, more patterns

#!/usr/bin/perl

use lib "/home/ws/local/lib/perl/5.6.1";
use lib "/home/ws/local/share/perl/5.6.1";
use strict;
use Mail::Box::Manager;
use XML::RSS;
use Text::Wrap;
use CGI qw(escapeHTML);
use POSIX;
use IPC::Open2 qw(open2);
use HTML::TreeBuilder;
use Unicode::String qw(latin1 utf8);

# set the following variables

# where patterns and html2text.pl are
my $basedir = "/home/ws/.procmail";

# the Maildir that you want RSS messages moved to
my $maildir = "/home/ws/mail/rss";

# the RSS output file
my $outfile = "/home/ws/public_html/aypwip.org/html/rss/email.xml";

# the number of emails you want in your RSS feed
my $NUM_POSTS = 10;

# force rewrapping to text
$Text::Wrap::columns = 72;

# -- that's it, you shouldn't have to change anything below here --

my $cmd = shift;
if ("rebuild" eq $cmd)
{
    writeRSS();
    exit(0);
}

# read email from stdin
my @lines = <>;

# read patterns from the patterns file
open(FILE, "$basedir/patterns");
my @patterns = <FILE>;
close(FILE);

chomp foreach (@patterns);

my $header = 1;
my $toRSS = "No";
foreach my $line (@lines)
{
    # are we done with the header?
    if (1 == $header)
    {
        # try to match the header (include body later)
        foreach my $p (@patterns)
        {
            if ($line =~ /$p/i)
            {
                $toRSS = "Yes";
            }
        }
        if (length($line) < 2)
        {
            $header = 0;
            print "X-Email2RSS: $toRSS\n";
        }
    }
    print $line;
}

# if it's RSS, regenerate the RSS feed
if ($toRSS eq "Yes")
{
    # print the current message first
    my $msg = Mail::Message->read(\@lines);
    writeRSS($msg);
}

exit(0);

sub writeRSS
{
    my $msg = shift @_;
    my $mgr = Mail::Box::Manager->new;
    my $folder = $mgr->open(folder => $maildir);
    my $numMsgs = $folder->messages;
    $numMsgs = $NUM_POSTS if ($numMsgs > $NUM_POSTS);

    my $rss = new XML::RSS (version => '2.0');
    my $builddate = strftime "%a, %e %b %Y %H:%M:%S GMT", gmtime;
    $rss->channel(title       => "Tony's email announce feed",
                  link        => 'http://www.aypwip.org',
                  description => escapeHTML("<a href='http://www.aypwip.org/cvs/index.pl/email2rss/'>email2rss</a>"),
                  language    => 'en-us',
                  copyright   => 'this is in the public domain',
                  webmaster   => "w\@aypwip.org",
                  docs        => 'http://blogs.law.harvard.edu/tech/rss',
                  lastBuildDate => $builddate
                 );
    
    for (my $i = -1; $i >= -$numMsgs - 1; $i--)
    {
        if ($msg)
        {
            my $from = $msg->sender;
            my $fromstr = "From: " . $from->name() . " [" . $from->user() . " at " . $from->host() . "]\n";

            my @to = $msg->to;
            my $tostr = '';
            foreach (@to)
            {
                $tostr .= "To: " . $_->name() . " [" . $_->user() . " at " . $_->host() . "]\n";
            }
            # check if it is a multipart message
            my $msgBody = $msg->body;
            my $body;
            if ($msgBody->isMultipart)
            {
                foreach my $part ($msgBody->parts)
                {
                    if ($part->body->type =~ /text\/plain/ || 
                          ($part->body->type =~ /text\/html/ && !$body))
                    {
                        $body = $part->body;
                    }
                }
            }
            else
            {
                $body = $msg->body;
            }
            my $bodyText = $body->decoded . ''; # get message as a string

            if ($body->type =~ /text\/html/i) # strip html
            {
                my ($rdh, $wrh);
                open2($rdh, $wrh, "$basedir/html2text.pl");
                print $wrh $bodyText;
                close($wrh);
                $bodyText = join '', <$rdh>;
            }
            $bodyText = join '', wrap('', '', $bodyText);
            $bodyText = escapeHTML($bodyText); # replace things like & and "
            # make links active
            $bodyText =~ s/(((https?)|(ftp))\:\/\/[^ \n]+[^. \n])/<a href="$1">$1<\/a>/g;
            
            $bodyText = utf8('<pre>' . $fromstr . $tostr . "\n" . $bodyText . '</pre>')->utf8;
            $rss->add_item(description => escapeHTML($bodyText),
                           pubDate     => strftime("%a, %d %b %Y %T GMT", gmtime($msg->timestamp)),
                           title       => escapeHTML($msg->subject),
                           author      => escapeHTML(escapeHTML($msg->sender->format)),
                           permaLink   => $msg->messageId
                          );
        }
        $msg = $folder->message($i);
    }

    open(FILE, ">$outfile");
    print FILE $rss->as_string() . "\n";
    close(FILE);
}

tony at ponderer dot org
Powered by
ViewCVS 0.9.2