#!/usr/bin/env python
#
#    Copyright (c) 2008 Claudio Bantaloukas <rockdreamer@gmail.com>
#    http://www.rdfm.org/
#
#    Permission is hereby granted, free of charge, to any person obtaining a copy
#    of this software and associated documentation files (the "Software"), to deal
#    in the Software without restriction, including without limitation the rights
#    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#    copies of the Software, and to permit persons to whom the Software is
#    furnished to do so, subject to the following conditions:
#
#    The above copyright notice and this permission notice shall be included in
#    all copies or substantial portions of the Software.
#
#    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
#    THE SOFTWARE.
"""
Utility to convert resources exported from a zope site to blog posts or pages on a wordpress blog.
Install python-docutils python-beautifulsoup
"""

import sys
import xmlrpclib
import re
from datetime import *
from xml.dom import minidom
from pprint import pprint

import codecs

class PostatoreException(Exception):
	pass	

class Postatore:

	def __init__(self,  thefile,  url,  login, password,  verbose,  alwaysconverttext):
		self.url=url
		self.login=login
		self.password=password
		self.verbose=verbose
		self.alwaysConvertText=alwaysconverttext
		self.service = xmlrpclib.Server(self.url)
		self.inHeader= True
		self.inDescription=False
		self.file = thefile

	def tryHeader(self,  line):
		t=line.split(': ', 1)
		isheader=(len(t)==2)
		if isheader:
			return (isheader, t[0], t[1])
		else:
			return (isheader,  line, None)

	def handleHeader(self, name, attribute):
		if name=='title' or name=='Title':
			self.title = attribute
			if self.verbose:
				print "Got Title: ", self.title.encode('latin-1')  
			return				
		if name=='Description':
			self.inDescription = True
			if self.verbose:
				print "Got Description line, ignoring following lines until next header"  
			return				
		if name=='creation_date' or name=='Effective_date':
			datere = re.compile( r'(\d{4})[/-](\d{,2})[/-](\d{,2}) (\d{,2}):(\d{,2}):(\d{,2})')
			if self.verbose:
				print "Date line:",  attribute, 'exports to'
				pprint(datere.search(attribute).groups())
			year,  month, day, hour, minute,  seconds = datere.search(attribute).groups()
			s=year+ month+ day +u"T" +hour+ u":" +minute+ u":"+  seconds
			self.creation = xmlrpclib.DateTime(s.encode( 'iso-8859-1'))
			if self.verbose:
				print "Got Creation Date: ", self.creation,  " from string ",  attribute
			return
		if name=='Content-Type' or name=='Format':
			self.type = attribute
			if self.verbose:
				print "Got Content-Type: ", self.type
			return

	def parseHtmlFormat(self):
		if self.verbose:
			print "Will parse html format file"
		
		from BeautifulSoup import BeautifulSoup
		it=self.file.read()
		soup = BeautifulSoup(it)
		
		self.title = unicode(soup.html.head.title.string)
		self.txt = unicode(soup.body.renderContents(),  'utf-8')
		for meta in soup.findAll('meta'):
			if meta['name'] and  meta['content']:
				self.handleHeader( meta['name'],  unicode(meta['content']))
		
	def convertHtmlContent(self):
		if self.verbose:
			print "Will parse html content through beautifulsoup"
		
		from BeautifulSoup import BeautifulSoup
		soup = BeautifulSoup(self.txt)

		if not soup.body:
			self.txt = unicode(soup.prettify(),  'utf-8')
		else:
			self.txt = unicode(soup.body.renderContents(),  'utf-8')

	def parseFile(self):
		firstline=self.file.read(6)
		if firstline.startswith("<html>"):
			self.file.seek(0)
			self.parseHtmlFormat()
		else:
			self.file.seek(0)
			self.parseTextFormat()

	def parseTextFormat(self):
		if self.verbose:
			print "Will parse text format file"
		self.txt=''
		
		for line in self.file:
			line = line.rstrip('\n').rstrip('\r')
			(isheader, name, attribute)=self.tryHeader(line)
			
			if isheader:
				if self.inDescription:
					self.inDescription=False
				self.handleHeader(name, attribute)
			
			if self.inDescription:
				continue
			
			if (self.inHeader):
				if line=="":
					self.inHeader = False
					continue
			else:
				self.txt+=line
				self.txt+='\n'
			
		self.handleContent()


	def handleContent(self):
		if self.type=="text/plain":
			if self.alwaysConvertText:
				self.convertRstContent()
			return
		
		if self.type=="text/html":
			self.convertHtmlContent()
			return
			
		if self.type=="text/x-rst" or self.type=="text/structured":
			self.convertRstContent()
			return
		
		raise PostatoreException("Dont know how to handle content-type "+self.type)

	def convertRstContent(self):
		from docutils import core, io
		
		if self.verbose:
			print "Will convert ReStructuredText to html before uploading"
			
		overrides= {
		'input_encoding':'unicode', 
		'doctitle_xform':1, 
		'initial_header_level':1
		}
		parts= core.publish_parts(
		                          source=self.txt,  source_path=None, destination_path=None, 
								  writer_name='html',  settings_overrides=overrides)
		self.txt = parts['html_body']

	def getPages(self):
		pprint(self.service.wp.getPages("", self.login, self.password))
	
	def getMethods(self):
		pprint(self.service.system.listMethods())
	
	def addPage(self,  publish):
		post = {}
		post['title']=self.title
		post['description']=self.txt
		post['dateCreated']=self.creation
		
		if self.verbose:
			print "Will try to add a new page"
			pprint(post)
			
		self.service.wp.newPage('', self.login, self.password, post, publish)

	def addPost(self,  tags,  categories,  publish):
		post = {}
		post['title']=self.title
		post['description']=self.txt
		post['dateCreated']=self.creation
		if tags:
			post['mt_keywords']=tags.split(',')
		if categories:
			post['categories']=categories.split(',')
		
		if self.verbose:
			print "Will try to add a new post"
			pprint(post)
		
		self.service.metaWeblog.newPost('0',  self.login, self.password, post, publish)

def main():
	from optparse import OptionParser
	parser = OptionParser(
	                      usage= 'usage: %prog [OPTION] FILE', 
	                      description="Extract resources from a zope ftp resource file and post it on a Wordpress blog. The post can be either a wordpress page or a normal post and the original file can be in html or rst format. Categories can be set via a comma separated list. Invalid categories are ignored. The title, date and content are obtained from the zope exported file.")
	parser.set_defaults(do_page=False)
	parser.set_defaults(do_post=True)
	parser.set_defaults(publish=False)
	parser.set_defaults(alwaysrest=False)
	parser.add_option('-a',  '--page',  action="store_true",  dest="do_page",
	                  help="Turn the zope resource into a Worpress Page")
	parser.add_option('-o',  '--post',  action="store_true",  dest="do_post",
	                  help="Turn the zope resource into a Worpress Post (default action)")
	parser.add_option('-t',  '--tags',  dest="tags",
	                  help="Add comma separated tags to the post", metavar="TAGS")
	parser.add_option('-c',  '--categories',  dest="categories",
	                  help="Add the post to a comma separated list of categories", metavar="CATEGORIES")
	parser.add_option('-i',  '--immediately',  action='store_true',  dest="publish",
	                  help="Publish the resulting post or page immediately instead of in draft state")
	parser.add_option('-r',  '--always-rest',   action='store_true',  dest="alwaysrest",
	                  help="Always treat plain text resource as ReStructuredText")
	parser.add_option('-u',  '--username',   dest="username",
	                  help="Blog username", metavar="USERNAME")
	parser.add_option('-p',  '--password',   dest="password",
	                  help="Blog password", metavar="PASSWORD")
	parser.add_option('-U',  '--url',   dest="url",
	                  help="Blog url", metavar="URL")
	parser.add_option('-v',  '--verbose',  action='store_true',  dest="verbose",
	                  help="Increase error and runtime messages", metavar="VERBOSE")
	(options, args) = parser.parse_args()
	
	if not options.url:
		print "Please provide a blog url"
		return
	if not options.username:
		print "Please provide a blog username"
		return
	if not options.password:
		print "Please provide a blog password"
		return

	if len(args) >= 1:
		file = codecs.open(args[0], 'r',  'utf-8')
	else:
		import sys
		file = sys.stdin

	fio = Postatore(file,  options.url,  options.username,  options.password,  options.verbose,  options.alwaysrest)
	fio.parseFile()
	
	#fio.getMethods()
	#fio.getPages()
	
	if options.do_page:
		fio.addPage(options.publish)
	else:
		fio.addPost(options.tags,  options.categories,  options.publish)
	
if __name__ == '__main__':
	main()
	
