手机
当前位置:查字典教程网 >编程开发 >php教程 >php实现的一个很好用HTML解析器类可用于采集数据
php实现的一个很好用HTML解析器类可用于采集数据
摘要:复制代码代码如下:_xpath=$xpath;$this->_nodePath=$nodePath;}publicfunctionloadH...

复制代码 代码如下:

<?php

$oldSetting = libxml_use_internal_errors( true );

libxml_clear_errors();

/**

*

* -+-----------------------------------

* |PHP5 Framework - 2011

* |Web Site: www.iblue.cc

* |E-mail: mejinke@gmail.com

* |Date: 2012-10-12

* -+-----------------------------------

*

* @desc HTML解析器

* @author jingke

*/

class XF_HtmlDom

{

private $_xpath = null;

private $_nodePath = '';

public function __construct($xpath = null, $nodePath = '')

{

$this->_xpath = $xpath;

$this->_nodePath = $nodePath;

}

public function loadHtml($url)

{

ini_set('user_agent', 'Mozilla/5.0 (Linux; U; Android 2.1; en-us; Nexus One Build/ERD62) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17 –Nexus');

$content = '';

if(strpos(strtolower($url), 'http')===false)

{

$content = file_get_contents($url);

}

else

{

$ch = curl_init();

$user_agent = "Baiduspider+(+http://www.baidu.com/search/spider.htm)";

$user_agent1='Mozilla/5.0 (Windows NT 5.1; rv:6.0) Gecko/20100101 Firefox/6.0';

curl_setopt($ch, CURLOPT_URL, $url);

curl_setopt($ch, CURLOPT_HEADER, false);

curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);

curl_setopt($ch, CURLOPT_REFERER, $url);

curl_setopt($ch, CURLOPT_USERAGENT, $user_agent1);

curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1);

$content =curl_exec($ch);

curl_close($ch);

}

$html = new DOMDocument();

$html->loadHtml($content);

$this->_xpath = new DOMXPath( $html );

//return $this;

}

public function find($query, $index = null)

{

if($this->_nodePath == '')

$this->_nodePath = '//';

else

$this->_nodePath .= '/';

$nodes = $this->_xpath->query($this->_nodePath.$query);

//echo $nodes->item(0)->getNodePath();exit;

if ($index == null && !is_numeric($index))

{

$tmp = array();

foreach ($nodes as $node)

{

$tmp[] = new XF_HtmlDom($this->_xpath, $node->getNodePath());

}

return $tmp;

}

return new XF_HtmlDom($this->_xpath,$this->_xpath->query($this->_nodePath.$query)->item($index)->getNodePath());

}

/**

* 获取内容

*/

public function text()

{

if ($this->_nodePath != '' && $this->_xpath != null )

return $this->_xpath->query($this->_nodePath)->item(0)->textContent;

else

return false;

}

/**

* 获取属性值

*/

public function getAttribute($name)

{

if ($this->_nodePath != '' && $this->_xpath != null )

return $this->_xpath->query($this->_nodePath)->item(0)->getAttribute($name);

else

return false;

}

public function __get($name)

{

if($name == 'innertext')

return $this->text();

else

return $this->getAttribute($name);

}

}

$xp = new xf_HtmlDom();

$xp->loadHtml('http://www.aizhan.com/siteall/www.opendir.cn/');

$rows = $xp->find("td[@id='baidu']/a", 0)->innertext;

print_r($rows);

【php实现的一个很好用HTML解析器类可用于采集数据】相关文章:

php实现求相对时间函数

实现树状结构的两种方法

php一个解析字符串排列数组的方法

PHP实现多线程的两个方法

Yii实现自动加载类地图的方法

PHP中的一些常用函数收集

一个用于mysql的数据库抽象层函数库

php实现比较两个字符串日期大小的方法

php实现文本数据导入SQL SERVER

php实现window平台的checkdnsrr函数

精品推荐
分类导航