|
|
|
|
File: [home] / python / delicious / Delicious.py
(download)
/
(as text)
Revision: 1.20, Fri Sep 14 05:55:02 2007 UTC (2 years, 11 months ago) by ws Branch: MAIN CVS Tags: HEAD Changes since 1.19: +6 -1 lines timezone munging |
#!/usr/bin/python
''' Grabs delicious bookmarks and stores them in a db. I run it as a daily
cron job.
If more than 15 bookmarks have been added since the last run, it will miss
those bookmarks.
This is modified from
http://dealmeida.net/en/Programming/Python/delicious.html
which uses libxml2, which I don't have installed.
TODO: not all del.icio.us bookmarks are html pages, they can also be pdf
files or graphics. There should be some mime detection to send the right
type of attachment.'''
import datetime, time, urllib2, base64, smtplib
from xml.dom.minidom import * # I don't have libxml2 on my host.
import MySQLdb
try:
from settings import settings
except ImportError:
import sys
print ("Make sure you rename settings-example.py to settings.py and\n"
"change the values before you run %s." % sys.argv[0])
sys.exit(1)
# from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/67083
def createhtmlmail(html, text, subject, fromaddr='py-delicious'):
"""Create a mime-message that will render HTML in popular
MUAs, text in better ones"""
import MimeWriter
import mimetools
import cStringIO
out = cStringIO.StringIO() # output buffer for our message
htmlin = cStringIO.StringIO(html)
txtin = cStringIO.StringIO(text)
writer = MimeWriter.MimeWriter(out)
#
# set up some basic headers... we put subject here
# because smtplib.sendmail expects it to be in the
# message body
#
writer.addheader("From", '"%s"' % fromaddr)
writer.addheader("To", settings['mailuser'])
writer.addheader("Subject", subject)
writer.addheader("MIME-Version", "1.0")
#
# start the multipart section of the message
# multipart/alternative seems to work better
# on some MUAs than multipart/mixed
#
writer.startmultipartbody("alternative")
writer.flushheaders()
#
# the plain text section
#
subpart = writer.nextpart()
subpart.addheader("Content-Transfer-Encoding", "quoted-printable")
pout = subpart.startbody("text/plain", [("charset", 'us-ascii')])
mimetools.encode(txtin, pout, 'quoted-printable')
txtin.close()
#
# start the html subpart of the message
#
subpart = writer.nextpart()
subpart.addheader("Content-Transfer-Encoding", "quoted-printable")
#
# returns us a file-ish object we can write to
#
pout = subpart.startbody("text/html", [("charset", 'us-ascii')])
mimetools.encode(htmlin, pout, 'quoted-printable')
htmlin.close()
#
# Now that we're done, close our writer and
# return the message body
#
writer.lastpart()
msg = out.getvalue()
out.close()
return msg
class Post:
''' Represent a post object '''
ATTRIBUTES = ('hash', 'time', 'href', 'tag', 'description', 'extended')
def CreateFromDOM(dom):
ret = Post()
for attr in Post.ATTRIBUTES:
setattr(ret, attr, dom.getAttribute(attr))
# Correct time based on timezone.
ret.time = datetime.datetime(*time.strptime(ret.time,
'%Y-%m-%dT%H:%M:%SZ')[:-2])
ret.time -= datetime.timedelta(hours=time.timezone / 60 / 60)
ret.time = str(ret.time)
return ret
CreateFromDOM = staticmethod(CreateFromDOM)
def addToDb(self, cur):
''' cur is a database cursor.
Return is 1 if the post is succesfully inserted into the db or 0 if it
fails (e.g., the entry is already in the db). '''
sql = 'DELETE FROM delicious WHERE hash=%s'
isDuplicate = cur.execute(sql, self.hash)
sql = ('INSERT INTO delicious (%s) VALUES(%s)'
% (','.join(Post.ATTRIBUTES),
','.join(['%s'] * len(Post.ATTRIBUTES))))
values = [getattr(self, s).encode('ascii', 'xmlcharrefreplace')
for s in Post.ATTRIBUTES]
cur.execute(sql, values)
return not isDuplicate
def email(self):
# first we download the page
request = urllib2.Request(self.href)
request.add_header('User-Agent', 'Mozilla/5.0 (compatible; '
'http://www.aypwip.org/cvs/index.pl/'
'python/delicious/)')
# what if this fails? try again? silently ignore and continue?
# let's see how much of a problem it is first
try:
html = urllib2.urlopen(request).read()
except Exception, e:
print 'unable to download %s' % self.href
print e
return
msg = createhtmlmail(html, 'delicious bookmark web page', self.tag,
self.href)
server = smtplib.SMTP(settings['smtp'])
server.ehlo()
server.starttls()
server.ehlo()
user = settings.get('smtpuser', '')
passwd = settings.get('smtppass', '')
if user and passwd:
server.login(settings['smtpuser'], settings['smtppass'])
server.sendmail(settings['mailuser'], settings['mailuser'], msg)
class Delicious:
def getRecent(self, username, password, tag=''):
''' Returns a list of Post objects. Tag is a tagname to filter
by. '''
# Set up authentication info.
request = urllib2.Request('https://api.del.icio.us/v1/posts/recent'
'?count=15&tag=%s' % tag)
base64string = base64.encodestring('%s:%s' % (username, password)).strip()
request.add_header('User-Agent', 'tony@ponderer.org; '
'http://www.aypwip.org/cvs/index.pl/'
'python/delicious/')
request.add_header('Authorization', 'Basic %s' % base64string)
doc = parseString(urllib2.urlopen(request).read())
posts = doc.getElementsByTagName('post')
ret = [Post.CreateFromDOM(x) for x in posts]
return ret
def addToDB(self, posts, cur):
''' Input is a list of Post objects and a database cursor. '''
# we could try to post all of them using a single sql statement,
# however, we want the error message associated with each post
# so we can determine whether or not to email the results
for p in posts:
if p.addToDb(cur):
# successful insertion into db. now email to gmail account.
p.email()
if __name__ == '__main__':
dbcur = MySQLdb.connect(host = settings['dbhost'],
user = settings['dbuser'],
passwd = settings['dbpass'],
db = settings['dbname']).cursor()
d = Delicious()
posts = d.getRecent(settings['deluser'], settings['delpass'],
tag=settings['tag'])
d.addToDB(posts, dbcur)
| tony at ponderer dot org |
Powered by ViewCVS 0.9.2 |