手机
当前位置:查字典教程网 >编程开发 >ASP教程 >实例讲解ASP实现抓取网上房产信息
实例讲解ASP实现抓取网上房产信息
摘要:UntitledDocument=15)thenexitforendifNextSetregEx=nothingSetMatches=not...

<%@LANGUAGE="VBSCRIPT"CODEPAGE="936"%>

<>

<>

<!DOCTYPEHTMLPUBLIC"-//W3C//DTDHTML4.01Transitional//EN""http://www.w3.org/TR/html4/loose.dtd">

<html>

<head>

<title>UntitledDocument</title>

<metahttp-equiv="Content-Type"content="text/html;charset=gb2312">

<metahttp-equiv="refresh"content="300;URL=steal_house.asp">

</head>

<body>

<%

onerrorresumenext

'

Server.ScriptTimeout=999999

'========================================================

'字符编码函数

'====================================================

FunctionBytesToBstr(body,code)

dimobjstream

setobjstream=Server.createObject("adodb.stream")

objstream.Type=1

objstream.Mode=3

objstream.Open

objstream.Writebody

objstream.Position=0

objstream.Type=2

objstream.Charset=code

BytesToBstr=objstream.ReadText

objstream.Close

setobjstream=nothing

EndFunction

'取行字符串在另一字符串中的出现位置

FunctionNewstring(wstr,strng)

Newstring=Instr(lcase(wstr),lcase(strng))

ifNewstring<=0thenNewstring=Len(wstr)

EndFunction

'替换字符串函数

functionReplaceStr(ori,str1,str2)

ReplaceStr=replace(ori,str1,str2)

endfunction

'====================================================

functionReadXML(url,code,start,ends)

setoSend=createobject("Microsoft.XMLHTTP")

SourceCode=oSend.open("GET",url,false)

oSend.send()

ReadXml=BytesToBstr(oSend.responseBody,code)

start=Instr(ReadXml,start)

ReadXml=mid(ReadXml,start)

ends=Instr(ReadXml,ends)

ReadXml=left(ReadXml,ends-1)

endfunction

functionSubStr(body,start,ends)

start=Instr(body,start)

SubStr=mid(body,start+len(start)+1)

ends=Instr(SubStr,ends)

SubStr=left(SubStr,ends-1)

endfunction

dimgetcont,NewsContent

dimurl,title

url="http://www.***.com"'新闻网址knowsky.com

getcont=ReadXml(url,"gb2312","<tableclass=k2border=""0""","</table>")

getcont=RegexHtml(getcont)

dimKeyId,NewsClass,City,Position,HouseType,Level,Area,Price,Demostra

dimContactMan,Contact

fori=2toubound(getcont)

response.Write(getcont(i)&"__<br>")

tempLink=mid(getcont(i),instr(getcont(i),"href=""")+6,instr(getcont(i),"""onClick")-10)

tempLink=replace(tempLink,"../","")

response.Write(i&":"&tempLink&"<br>")

NewsContent=ReadXml(tempLink,"gb2312","<tdvalign=""bottom""width=""400"">","<hrwidth=""760""noshadesize=""1""color=""#808080"">")

NewsContent=RemoveHtml(NewsContent)

NewsContent=replace(NewsContent,VbCrLf,"")

NewsContent=replace(NewsContent,vbNewLine,"")

NewsContent=replace(NewsContent,"","")

NewsContent=replace(NewsContent,"","")

NewsContent=replace(NewsContent,"","")

NewsContent=replace(NewsContent,"n","")

NewsContent=replace(NewsContent,chr(10),"")

NewsContent=replace(NewsContent,chr(13),"")

'===============getContent=======================

response.Write(NewsContent)

KeyId=SubStr(NewsContent,"列号:","信息类别:")

NewsClass=SubStr(NewsContent,"类别:","所在城市:")

City=SubStr(NewsContent,"城市:","房屋具体位置:")

Position=SubStr(NewsContent,"位置:","房屋类型:")

HouseType=SubStr(NewsContent,"类型:","楼层:")

Level=SubStr(NewsContent,"楼层:","使用面积:")

Area=SubStr(NewsContent,"面积:","房价:")

Price=SubStr(NewsContent,"房价:","其他说明:")

Demostra=SubStr(NewsContent,"说明:","联系人:")

ContactMan=SubStr(NewsContent,"联系人:","联系方式:")

Contact=SubStr(NewsContent,"联系方式:","信息来源:")

response.Write("总序列号:"&KeyId&"<br>")

response.Write("信息类别:"&NewsClass&"<br>")

response.Write("所在城市:"&City&"<br>")

response.Write("房屋具体位置:"&Position&"<br>")

response.Write("房屋类型:"&HouseType&"<br>")

response.Write("楼层:"&Level&"<br>")

response.Write("使用面积:"&Area&"<br>")

response.Write("房价:"&Price&"<br>")

response.Write("其他说明:"&Demostra&"<br>")

response.Write("联系人:"&ContactMan&"<br>")

response.Write("联系方式:"&Contact&"<br>")

'title=RemoveHTML(aa(i))

'response.Write("title:"&title)

forn=0toapplication.Contents.count

if(application.Contents(n)=KeyId)then

ifexit=true

endif

next

ifnotifexitthen

application(time&i)=KeyId

'添加到数据库

'====================================================

setrs=server.createObject("adodb.recordset")

rs.open"selecttop1*fromnewsorderbyiddesc",conn,3,3

rs.addnew

rs("NewsClass")=NewsClass

rs("City")=City

rs("Position")=Position

rs("HouseType")=HouseType

rs("Level")=Level

rs("Area")=Area

rs("Price")=Price

rs("Demostra")=Demostra

rs("ContactMan")=ContactMan

rs("Contact")=Contact

rs.update

rs.close

setrs=nothing

endif

'==================================================

next

functionRemoveTag(body)

SetregEx=NewRegExp

regEx.Pattern="<[a].*?</[a]>"

regEx.IgnoreCase=True

regEx.Global=True

SetMatches=regEx.Execute(body)

dimi,arr(15),ifexit

i=0

j=0

ForEachMatchinMatches

TempStr=Match.Value

TempStr=replace(TempStr,"<td>","")

TempStr=replace(TempStr,"</td>","")

TempStr=replace(TempStr,"<tr>","")

TempStr=replace(TempStr,"</tr>","")

arr(i)=TempStr

i=i+1

if(i>=15)then

exitfor

endif

Next

SetregEx=nothing

SetMatches=nothing

RemoveTag=arr

endfunction

functionRegexHtml(body)

dimr_arr(47),r_temp

SetregEx2=NewRegExp

regEx2.Pattern="<a.*?</a>"

regEx2.IgnoreCase=True

regEx2.Global=True

SetMatches2=regEx2.Execute(body)

iii=0

ForEachMatchinMatches2

r_arr(iii)=Match.Value

iii=iii+1

Next

RegexHtml=r_arr

setregEx2=nothing

setMatches2=nothing

endfunction

'======================================================

conn.close

setconn=nothing

%>

</body>

</html>

function.asp

<%

'**************************************************

'函数名:gotTopic

'作用:截字符串,汉字一个算两个字符,英文算一个字符

'参数:str----原字符串

'strlen----截取长度

'返回值:截取后的字符串

'**************************************************

functiongotTopic(str,strlen)

ifstr=""then

gotTopic=""

exitfunction

endif

diml,t,c,i

str=replace(replace(replace(replace(str,"",""),""",chr(34)),">",">"),"<","<")

str=replace(str,"?","")

l=len(str)

t=0

fori=1tol

c=Abs(Asc(Mid(str,i,1)))

ifc>255then

t=t+2

else

t=t+1

endif

ift>=strlenthen

gotTopic=left(str,i)&"…"

exitfor

else

gotTopic=str

endif

next

gotTopic=replace(replace(replace(replace(gotTopic,"",""),chr(34),"""),">",">"),"<","<")

endfunction

'=========================================================

'函数:RemoveHTML(strHTML)

'功能:去除HTML标记

'参数:strHTML--要去除HTML标记的字符串

'=========================================================

FunctionRemoveHTML(strHTML)

DimobjRegExp,Match,Matches

SetobjRegExp=NewRegexp

objRegExp.IgnoreCase=True

objRegExp.Global=True

'取闭合的<>

objRegExp.Pattern="<.+?>"

'进行匹配

SetMatches=objRegExp.Execute(strHTML)

'遍历匹配集合,并替换掉匹配的项目

ForEachMatchinMatches

strHtml=Replace(strHTML,Match.Value,"")

Next

RemoveHTML=strHTML

SetobjRegExp=Nothing

setMatches=nothing

EndFunction

%>

conn.asp

<%

'onerrorresumenext

setconn=server.createObject("adodb.connection")

con="driver={MicrosoftAccessDriver(*.mdb)};dbq="&Server.MapPath("stest.mdb")

conn.opencon

subconnclose

conn.close

setconn=nothing

endsub

%>

【实例讲解ASP实现抓取网上房产信息】相关文章:

asp实现sql的备份与恢复

ASP,FSO遍历目录及目录下文件

ASP.NET实现单点登陆(SSO)适用于哪些情况?

实例演练ASP+XML编程比较全的了第1/2页

ASP实现URL编码

ASP开发网页牢记注意事项

用XML+FSO+JS实现服务器端文件的选择效果

用VB生成DLL封装ASP连接数据库的代码

[ASP]使用类,实现模块化

用ASP+DLL实现WEB方式修改服务器时间

精品推荐
分类导航