|
|
|
|
File: [home] / email2rss / email2rss.pl
(download)
/
(as text)
Revision: 1.11, Tue Jan 25 22:00:22 2005 UTC (5 years, 7 months ago) by ws Branch: MAIN CVS Tags: HEAD Changes since 1.10: +1 -1 lines increase wrap size, more patterns |
#!/usr/bin/perl
use lib "/home/ws/local/lib/perl/5.6.1";
use lib "/home/ws/local/share/perl/5.6.1";
use strict;
use Mail::Box::Manager;
use XML::RSS;
use Text::Wrap;
use CGI qw(escapeHTML);
use POSIX;
use IPC::Open2 qw(open2);
use HTML::TreeBuilder;
use Unicode::String qw(latin1 utf8);
# set the following variables
# where patterns and html2text.pl are
my $basedir = "/home/ws/.procmail";
# the Maildir that you want RSS messages moved to
my $maildir = "/home/ws/mail/rss";
# the RSS output file
my $outfile = "/home/ws/public_html/aypwip.org/html/rss/email.xml";
# the number of emails you want in your RSS feed
my $NUM_POSTS = 10;
# force rewrapping to text
$Text::Wrap::columns = 72;
# -- that's it, you shouldn't have to change anything below here --
my $cmd = shift;
if ("rebuild" eq $cmd)
{
writeRSS();
exit(0);
}
# read email from stdin
my @lines = <>;
# read patterns from the patterns file
open(FILE, "$basedir/patterns");
my @patterns = <FILE>;
close(FILE);
chomp foreach (@patterns);
my $header = 1;
my $toRSS = "No";
foreach my $line (@lines)
{
# are we done with the header?
if (1 == $header)
{
# try to match the header (include body later)
foreach my $p (@patterns)
{
if ($line =~ /$p/i)
{
$toRSS = "Yes";
}
}
if (length($line) < 2)
{
$header = 0;
print "X-Email2RSS: $toRSS\n";
}
}
print $line;
}
# if it's RSS, regenerate the RSS feed
if ($toRSS eq "Yes")
{
# print the current message first
my $msg = Mail::Message->read(\@lines);
writeRSS($msg);
}
exit(0);
sub writeRSS
{
my $msg = shift @_;
my $mgr = Mail::Box::Manager->new;
my $folder = $mgr->open(folder => $maildir);
my $numMsgs = $folder->messages;
$numMsgs = $NUM_POSTS if ($numMsgs > $NUM_POSTS);
my $rss = new XML::RSS (version => '2.0');
my $builddate = strftime "%a, %e %b %Y %H:%M:%S GMT", gmtime;
$rss->channel(title => "Tony's email announce feed",
link => 'http://www.aypwip.org',
description => escapeHTML("<a href='http://www.aypwip.org/cvs/index.pl/email2rss/'>email2rss</a>"),
language => 'en-us',
copyright => 'this is in the public domain',
webmaster => "w\@aypwip.org",
docs => 'http://blogs.law.harvard.edu/tech/rss',
lastBuildDate => $builddate
);
for (my $i = -1; $i >= -$numMsgs - 1; $i--)
{
if ($msg)
{
my $from = $msg->sender;
my $fromstr = "From: " . $from->name() . " [" . $from->user() . " at " . $from->host() . "]\n";
my @to = $msg->to;
my $tostr = '';
foreach (@to)
{
$tostr .= "To: " . $_->name() . " [" . $_->user() . " at " . $_->host() . "]\n";
}
# check if it is a multipart message
my $msgBody = $msg->body;
my $body;
if ($msgBody->isMultipart)
{
foreach my $part ($msgBody->parts)
{
if ($part->body->type =~ /text\/plain/ ||
($part->body->type =~ /text\/html/ && !$body))
{
$body = $part->body;
}
}
}
else
{
$body = $msg->body;
}
my $bodyText = $body->decoded . ''; # get message as a string
if ($body->type =~ /text\/html/i) # strip html
{
my ($rdh, $wrh);
open2($rdh, $wrh, "$basedir/html2text.pl");
print $wrh $bodyText;
close($wrh);
$bodyText = join '', <$rdh>;
}
$bodyText = join '', wrap('', '', $bodyText);
$bodyText = escapeHTML($bodyText); # replace things like & and "
# make links active
$bodyText =~ s/(((https?)|(ftp))\:\/\/[^ \n]+[^. \n])/<a href="$1">$1<\/a>/g;
$bodyText = utf8('<pre>' . $fromstr . $tostr . "\n" . $bodyText . '</pre>')->utf8;
$rss->add_item(description => escapeHTML($bodyText),
pubDate => strftime("%a, %d %b %Y %T GMT", gmtime($msg->timestamp)),
title => escapeHTML($msg->subject),
author => escapeHTML(escapeHTML($msg->sender->format)),
permaLink => $msg->messageId
);
}
$msg = $folder->message($i);
}
open(FILE, ">$outfile");
print FILE $rss->as_string() . "\n";
close(FILE);
}
| tony at ponderer dot org |
Powered by ViewCVS 0.9.2 |