手机
当前位置:查字典教程网 >脚本专栏 >python >使用python解析xml成对应的html示例分享
使用python解析xml成对应的html示例分享
摘要:SAX将dd.xml解析成html。当然啦,如果得到了xml对应的xsl文件可以直接用libxml2将其转换成html。复制代码代码如下:#...

SAX将dd.xml解析成html。当然啦,如果得到了xml对应的xsl文件可以直接用libxml2将其转换成html。

复制代码 代码如下:

#!/usr/bin/env python

# -*- coding: utf-8 -*-

#---------------------------------------

# 程序:XML解析器

# 版本:01.0

# 作者:mupeng

# 日期:2013-12-18

# 语言:Python 2.7

# 功能:将xml解析成对应的html

# 注解:该程序用xml.sax模块的parse函数解析XML,并生成事件

# 继承ContentHandler并重写其事件处理函数

# Dispatcher主要用于相应标签的起始、结束事件的派发

#---------------------------------------

from xml.sax.handler import ContentHandler

from xml.sax import parse

class Dispatcher:

def dispatch(self, prefix, name, attrs=None):

mname = prefix + name.capitalize()

dname = 'default' + prefix.capitalize()

method = getattr(self, mname, None)

if callable(method): args = ()

else:

method = getattr(self, dname, None)

#args = name

#if prefix == 'start': args += attrs

if callable(method): method()

def startElement(self, name, attrs):

self.dispatch('start', name, attrs)

def endElement(self, name):

self.dispatch('end', name)

class Website(Dispatcher, ContentHandler):

def __init__(self):

self.fout = open('ddt_SAX.html', 'w')

self.imagein = False

self.desflag = False

self.item = False

self.title = ''

self.link = ''

self.guid = ''

self.url = ''

self.pubdate = ''

self.description = ''

self.temp = ''

self.prx = ''

def startChannel(self):

self.fout.write('''<html>n<head>n<title> RSS-''')

def endChannel(self):

self.fout.write('''

<tr><td height="20"></td></tr>

</table>

</center>

<script>

function GetTimeDiff(str)

{

if(str == '')

{

return '';

}

var pubDate = new Date(str);

var nowDate = new Date();

var diffMilSeconds = nowDate.valueOf()-pubDate.valueOf();

var days = diffMilSeconds/86400000;

days = parseInt(days);

diffMilSeconds = diffMilSeconds-(days*86400000);

var hours = diffMilSeconds/3600000;

hours = parseInt(hours);

diffMilSeconds = diffMilSeconds-(hours*3600000);

var minutes = diffMilSeconds/60000;

minutes = parseInt(minutes);

diffMilSeconds = diffMilSeconds-(minutes*60000);

var seconds = diffMilSeconds/1000;

seconds = parseInt(seconds);

var returnStr = "±±¾©·¢²¼Ê±¼ä£º" + pubDate.toLocaleString();

if(days > 0)

{

returnStr = returnStr + "£¨¾àÀëÏÖÔÚ" + days + "Ìì" + hours + "Сʱ" + minutes + "·ÖÖÓ£©";

}

else if (hours > 0)

{

returnStr = returnStr + "£¨¾àÀëÏÖÔÚ" + hours + "Сʱ" + minutes + "·ÖÖÓ£©";

}

else if (minutes > 0)

{

returnStr = returnStr + "£¨¾àÀëÏÖÔÚ" + minutes + "·ÖÖÓ£©";

}

return returnStr;

}

function GetSpanText()

{

var pubDate;

var pubDateArray;

var spanArray = document.getElementsByTagName("span");

for(var i = 0; i < spanArray.length; i++)

{

pubDate = spanArray[i].innerHTML;

document.getElementsByTagName("span")[i].innerHTML = GetTimeDiff(pubDate);

}

}

GetSpanText();

</script>

</body>

</html>

''')

self.fout.close()

def characters(self, chars):

if chars.strip():

#chars = chars.strip()

self.temp += chars

#print self.temp

def startTitle(self):

if self.item:

self.fout.write('''

<tr bgcolor="#eeeeee">n<td height="30">n<B>

''')

def endTitle(self):

if not self.imagein and not self.item:

self.title = self.temp

self.temp = ''

self.fout.write(self.title.encode('gb2312'))

#self.title = self.temp

self.fout.write('''

</title>n</head>n<body>n<center>n

<script>n

function copyLink()

{

clipboardData.setData("Text",window.location.href);

alert("RSSÁ´½ÓÒѾ­¸´ÖƵ½¼ôÌù°å");

}

function subscibeLink()

{

var str = window.location.pathname;

while(str.match(/^//))

{

str = str.replace(/^//,"");

}

window.open("http://rss.sina.com.cn/my_sina_web_rss_news.html?url=" + str,"_self");

}

</script>n

<table width="750" cellpadding="0" cellspacing="0">n

<tr>n

<td align="right" valign="bottom">n

''')

if self.item:

self.title = self.temp

self.temp = ''

self.fout.write(self.title.encode('gb2312'))

self.fout.write('''

</B>

</td>

</tr>

<tr bgcolor="#eeeeee">

<td>

''')

def startImage(self):

self.imagein = True

def endImage(self):

self.imagein = False

def startLink(self):

if self.imagein:

self.fout.write('''<A href=" ''')

def endLink(self):

self.link = self.temp

self.temp = ''

if self.imagein:

self.fout.write(self.link.encode('gb2312'))

self.fout.write('''" target="_blank">n ''')

elif self.item:

#self.link = self.temp

pass

else:

self.fout.write(self.link)

self.fout.write(''' " target="

_blank

"> ''')

self.fout.write(self.title.encode('gb2312'))

self.fout.write(''' </A></B></td>

</tr>

<tr><td colspan="2" align="center">

''')

self.fout.write(self.description.encode('gb2312'))

self.fout.write('''

</td></tr>

<tr bgcolor="#eeeeff"><td colspan="2"><b><a href="javascript:copyLink();">¸´ÖÆ´ËÒ³Á´½Ó</a> <a href="javascript:subscibeLink();">ÎÒҪǶÈë¸ÃÐÂÎÅÁÐ±íµ½ÎÒµÄÒ³Ã棨¼òµ¥¡¢¿ìËÙ¡¢ÊµÊ±¡¢Ãâ·Ñ£©</a></b></td></tr>

</table>

<table width="750" cellpadding="0" cellspacing="0">

''')

def startUrl(self):

if self.imagein:

self.fout.write('''<IMG src=" ''')

def endUrl(self):

self.url = self.temp

self.temp = ''

if self.imagein:

self.fout.write(self.url.encode('gb2312'))

self.fout.write('''" border="0">n

</A>

</td>

<td align="left" valign="bottom"><B><A href="

''')

if self.item:

#self.url = self.temp

pass

def defaultStart(self):

pass

def defaultEnd(self):

self.temp = ''

def startDescription(self):

pass

def endDescription(self):

self.description = self.temp

self.temp = ''

if self.item:

#self.fout.write('¡¡¡¡')

self.fout.write(self.description.encode('gb2312'))

def endGuid(self):

self.guid = self.temp

def endPubdate(self):

if not self.temp.startswith('http'):

self.pubdate = self.temp

self.temp = ''

else:

self.pubdate = ''

def startItem(self):

self.item = True

def endItem(self):

self.item = False

self.fout.write('''

</td>

</tr>

<tr bgcolor="#eeeeee">

<td>

<A href="''')

self.fout.write(self.link)

self.fout.write(''' " target="_blank"> ''')

self.fout.write(self.guid)

self.fout.write('''

</A>

</td>

</tr>

<tr bgcolor="#eeeeee">

<td><span>''')

self.fout.write(self.pubdate)

self.fout.write('''</span></td>

</tr>

<tr height="10"><td></td></tr>''')

#程序入口

if __name__ == '__main__':

parse('ddt.xml', Website())

【使用python解析xml成对应的html示例分享】相关文章:

python连接池实现示例程序

pyqt4教程之widget使用示例分享

使用python 获取进程pid号的方法

python动态监控日志内容的示例

使用python装饰器验证配置文件示例

python list转dict示例分享

忘记ftp密码使用python ftplib库暴力破解密码的方法示例

python读文件逐行处理的示例代码分享

使用cx_freeze把python打包exe示例

python模拟登陆Tom邮箱示例分享

精品推荐
分类导航