(file) Return to Delicious.py CVS log (file) (dir) Up to [home] / python / delicious

File: [home] / python / delicious / Delicious.py (download) / (as text)
Revision: 1.20, Fri Sep 14 05:55:02 2007 UTC (2 years, 11 months ago) by ws
Branch: MAIN
CVS Tags: HEAD
Changes since 1.19: +6 -1 lines
timezone munging

#!/usr/bin/python

''' Grabs delicious bookmarks and stores them in a db.  I run it as a daily
cron job.

If more than 15 bookmarks have been added since the last run, it will miss
those bookmarks.

This is modified from 
  http://dealmeida.net/en/Programming/Python/delicious.html
which uses libxml2, which I don't have installed.

TODO: not all del.icio.us bookmarks are html pages, they can also be pdf
files or graphics.  There should be some mime detection to send the right
type of attachment.'''

import datetime, time, urllib2, base64, smtplib
from xml.dom.minidom import *  # I don't have libxml2 on my host.

import MySQLdb

try:
  from settings import settings
except ImportError:
  import sys
  print ("Make sure you rename settings-example.py to settings.py and\n"
         "change the values before you run %s." % sys.argv[0])
  sys.exit(1)

# from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/67083
def createhtmlmail(html, text, subject, fromaddr='py-delicious'):
  """Create a mime-message that will render HTML in popular
  MUAs, text in better ones"""
  import MimeWriter
  import mimetools
  import cStringIO
  
  out = cStringIO.StringIO() # output buffer for our message 
  htmlin = cStringIO.StringIO(html)
  txtin = cStringIO.StringIO(text)
  
  writer = MimeWriter.MimeWriter(out)
  #
  # set up some basic headers... we put subject here
  # because smtplib.sendmail expects it to be in the
  # message body
  #
  writer.addheader("From", '"%s"' % fromaddr)
  writer.addheader("To", settings['mailuser'])
  writer.addheader("Subject", subject)
  writer.addheader("MIME-Version", "1.0")
  #
  # start the multipart section of the message
  # multipart/alternative seems to work better
  # on some MUAs than multipart/mixed
  #
  writer.startmultipartbody("alternative")
  writer.flushheaders()
  #
  # the plain text section
  #
  subpart = writer.nextpart()
  subpart.addheader("Content-Transfer-Encoding", "quoted-printable")
  pout = subpart.startbody("text/plain", [("charset", 'us-ascii')])
  mimetools.encode(txtin, pout, 'quoted-printable')
  txtin.close()
  #
  # start the html subpart of the message
  #
  subpart = writer.nextpart()
  subpart.addheader("Content-Transfer-Encoding", "quoted-printable")
  #
  # returns us a file-ish object we can write to
  #
  pout = subpart.startbody("text/html", [("charset", 'us-ascii')])
  mimetools.encode(htmlin, pout, 'quoted-printable')
  htmlin.close()
  #
  # Now that we're done, close our writer and
  # return the message body
  #
  writer.lastpart()
  msg = out.getvalue()
  out.close()
  return msg


class Post:
  ''' Represent a post object '''

  ATTRIBUTES = ('hash', 'time', 'href', 'tag', 'description', 'extended')

  def CreateFromDOM(dom):
    ret = Post()
    for attr in Post.ATTRIBUTES:
      setattr(ret, attr, dom.getAttribute(attr))
    # Correct time based on timezone.
    ret.time = datetime.datetime(*time.strptime(ret.time,
                                                '%Y-%m-%dT%H:%M:%SZ')[:-2])
    ret.time -= datetime.timedelta(hours=time.timezone / 60 / 60)
    ret.time = str(ret.time)
    return ret
  CreateFromDOM = staticmethod(CreateFromDOM)
  
  def addToDb(self, cur):
    ''' cur is a database cursor.

    Return is 1 if the post is succesfully inserted into the db or 0 if it
    fails (e.g., the entry is already in the db). '''
    
    sql = 'DELETE FROM delicious WHERE hash=%s'
    isDuplicate = cur.execute(sql, self.hash)
    
    sql = ('INSERT INTO delicious (%s) VALUES(%s)'
           % (','.join(Post.ATTRIBUTES),
              ','.join(['%s'] * len(Post.ATTRIBUTES))))
    values = [getattr(self, s).encode('ascii', 'xmlcharrefreplace')
              for s in Post.ATTRIBUTES]
    cur.execute(sql, values)
    return not isDuplicate
  
  def email(self):
    # first we download the page
    request = urllib2.Request(self.href)
    request.add_header('User-Agent', 'Mozilla/5.0 (compatible; '
                       'http://www.aypwip.org/cvs/index.pl/'
                       'python/delicious/)')
    # what if this fails?  try again? silently ignore and continue?
    # let's see how much of a problem it is first
    try:
      html = urllib2.urlopen(request).read()
    except Exception, e:
      print 'unable to download %s' % self.href
      print e
      return
    msg = createhtmlmail(html, 'delicious bookmark web page', self.tag,
                         self.href)
    server = smtplib.SMTP(settings['smtp'])
    server.ehlo()
    server.starttls()
    server.ehlo()
    user = settings.get('smtpuser', '')
    passwd = settings.get('smtppass', '')
    if user and passwd:
      server.login(settings['smtpuser'], settings['smtppass'])
    server.sendmail(settings['mailuser'], settings['mailuser'], msg)

class Delicious:

  def getRecent(self, username, password, tag=''):
    ''' Returns a list of Post objects.  Tag is a tagname to filter
    by. '''
    
    # Set up authentication info.
    request = urllib2.Request('https://api.del.icio.us/v1/posts/recent'
                              '?count=15&tag=%s' % tag)
    base64string = base64.encodestring('%s:%s' % (username, password)).strip()
    request.add_header('User-Agent', 'tony@ponderer.org; '
                       'http://www.aypwip.org/cvs/index.pl/'
                       'python/delicious/')
    request.add_header('Authorization', 'Basic %s' % base64string)
    
    doc = parseString(urllib2.urlopen(request).read())

    posts = doc.getElementsByTagName('post')
    ret = [Post.CreateFromDOM(x) for x in posts]
    return ret

  def addToDB(self, posts, cur):
    ''' Input is a list of Post objects and a database cursor. '''
    
    # we could try to post all of them using a single sql statement,
    # however, we want the error message associated with each post
    # so we can determine whether or not to email the results
    for p in posts:
      if p.addToDb(cur):
        # successful insertion into db. now email to gmail account.
        p.email()

if __name__ == '__main__':
  dbcur = MySQLdb.connect(host = settings['dbhost'],
                          user = settings['dbuser'],
                          passwd = settings['dbpass'],
                          db = settings['dbname']).cursor()

  d = Delicious()
  posts = d.getRecent(settings['deluser'], settings['delpass'],
                      tag=settings['tag'])
  d.addToDB(posts, dbcur)

tony at ponderer dot org
Powered by
ViewCVS 0.9.2