手机
当前位置:查字典教程网 >编程开发 >php教程 >php读取大文件示例分享(文件操作类)
php读取大文件示例分享(文件操作类)
摘要:Lib_File2.php复制代码代码如下:Lib_File1.php复制代码代码如下:

Lib_File2.php

复制代码 代码如下:

<?php

class Lib_File2

{

//文件目录

private $root = '/data/wwwroot/kkpromo/data/';

//文件后缀

private $suffix = '.log';

//文件句柄

private $handle=null;

//一次读取文件的最大记录数

private $limit=40000;

//每行读取的字节长度

private $length=1024;

//开始时间

private $startTime=0;

//内存使用基准点

private static $startMemory=0;

//

private $conn=null;

//

private static $init=null;

public static function instance()

{

self::$startMemory = memory_get_usage(true);

if(self::$init && is_object(self::$init))

{

return self::$init;

}

self::$init = new self();

return self::$init;

}

private function __construct(){}

public function setRoot($root)

{

if(!is_dir($root)) die($root.' ROOT DOES NOT EXIST');

$this->root = $root;

}

public function setSuffix($suffix)

{

$this->suffix = $suffix;

}

public function setLimit($limit)

{

if(!is_numeric($limit)) die($limit.' SHOULD BE NUMBERIC');

if(intval($limit) > 1000000) die($limit.' SHOULD BE LOWER THAN 1000000');

$this->limit = intval($limit);

}

public function _getFile($date , $appid , $op)

{

$filename = rtrim($this->root , '/').DIRECTORY_SEPARATOR.$date.DIRECTORY_SEPARATOR.$appid.'.'.$op.$this->suffix;

if(!file_exists($filename))

{

die($filename.'FILE DOES NOT EXISTS!');

}

if(!is_file($filename))

{

die($filename.' FILE DOES NOT EXIST!');

}

if(!is_readable($filename))

{

die($filename.' FILE ACCESS DENY!');

}

return $filename;

}

public function closeFile($date=null , $appid=null , $op=null)

{

if($op && $date && $appid)

{

if(is_object($this->handle[$date.'_'.$appid.'_'.$op]) || $this->conn[$date.'_'.$appid.'_'.$op])

{

unset($this->handle[$date.'_'.$appid.'_'.$op]);

$this->handle[$date.'_'.$appid.'_'.$op]=null;

}

$this->conn[$date.'_'.$appid.'_'.$op]=null;

$this->handle[$date.'_'.$appid.'_'.$op]=null;

unset($this->handle[$date.'_'.$appid.'_'.$op]);

}

else {

if(is_array($this->handle) && $this->handle)

{

foreach ($this->handle as $key=>$val){

unset($this->handle[$key]);

$this->conn[$key]=null;

$this->handle[$key]=null;

}

}

}

return true;

}

private function _openFile($date , $appid , $op)

{

$this->startTime = microtime(true);

if(isset($this->conn[$date.'_'.$appid.'_'.$op]) && $this->conn[$date.'_'.$appid.'_'.$op])

{

return $this->handle[$date.'_'.$appid.'_'.$op];

}

$filename = self::_getFile($date , $appid , $op);

if(($this->handle[$date.'_'.$appid.'_'.$op] = new SplFileObject($filename , 'r'))!=null)

{

$this->conn[$date.'_'.$appid.'_'.$op] = true;

return $this->handle[$date.'_'.$appid.'_'.$op];

}

else {

die('FILE OPEN FAILED!');

}

}

/**

* 功能:解析数据

* 格式: array('timestamp','mid','data');

* @param string $data

* @return boolean|array

*/

private function _parseData($data , $jsonFlag=true)

{

if(empty($data) || !is_string($data)) return false;

$result = array(

'timestamp'=>0,

'mid'=>0,

'data'=>array(),

);

$data = explode('|', $data);

if(count($data) < 3 || !is_array($data)) return false;

$result['timestamp'] = $data[0];

$result['mid'] = $data[1];

if($jsonFlag)

{

$result['data'] = @json_decode($data[2] , true);

unset($result['mid']);

}

if(empty($result['timestamp']) || empty($result['mid'])) return false;

unset($data);

return $result;

}

/**

* TODO:读取单一文件

* @param string $date: 如(20140327)

* @param int $appid: 如(1000,9000)

* @param string $op:如(show,login , index)

* @param number $startNum 默认从第一行开始

* @param number $length 默认到$this->limit 读取的行数

* @param array$condition:array('mid'=>arrray() , 'ip'=>array() , ...) 过滤条件

* @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉false字段

* @return array(count , diffTime , memory , data)

*/

public function readFile($date , $appid , $op , $startNum=0 , $length=0 , $jsonFlag=false , $condition=array())

{

$data['data'] = "";

$data['count'] = 0;

$index = $startNum;

$startNum = empty($startNum) ? 0 : $startNum;

$length = empty($length) ? $this->limit : $length;

$handle = self::_openFile($date , $appid , $op );

$line_number=0;

if($handle)

{

$handle->seek($startNum);

$handle->setMaxLineLen($this->length);

while (intval($line_number) - intval($startNum) < intval($length)-1)

{

$tmp = $handle->current();

if(empty($tmp)) continue;

$tmp = self::_parseData($tmp , $jsonFlag);

$line_number = $handle->key();

!$jsonFlag && $condition= array();

if(isset($condition) && $condition)

{

$key = array_keys($condition);

if(in_array($tmp['data'][$key[0]], $condition[$key[0]]))

{

$data['count']++;

$data['data'][$line_number] = $tmp;

}

}

else

{

$data['data'][$line_number] = $tmp;

$data['count']++;

}

if(intval($line_number) - intval($startNum) >= intval($length)-1) break;

unset($tmp);

$handle->next();

}

unset($tmp , $length , $line_number , $condition);

}

$data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);

$data['memory'] = doubleval((doubleval(memory_get_usage(true)) - doubleval(self::$startMemory))/1024/1024) . ' M';

return $data;

}

/**

* TODO:命令行下获取文件总记录数*

* @param string $date

* @param int $appid

* @param string $op

* @return array

*/

public function total_lineFile($date, $appid, $op)

{

$this->_openFile($date, $appid, $op);

$file = escapeshellarg($this->_getFile($date, $appid, $op)); // 对命令行参数进行安全转义

$line = `wc -l $file`;

if(preg_match("/(d{1,})/", $line , $ret)){

$data['count']=$ret[1];

}else{

$data['count']=0;

}

$data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);

$data['memory'] = doubleval((doubleval(memory_get_usage(true)) - doubleval(self::$startMemory))/1024/1024) . ' M';

return $data;

}

/**

* TODO:统计{$data}.{$op}.log记录数

* @param string $date

* @param int $appid

* @param string $op

* @param array $condition

* @return array

*/

public function countFile($date , $appid , $op ,$condition=array())

{

$data['count'] = 0;

$handle = self::_openFile($date , $appid , $op );

if($handle)

{

$handle->setMaxLineLen($this->length);

while (!$handle->eof())

{

$tmp = $handle->current();

if(empty($tmp)) continue;

$tmp = self::_parseData($tmp);

if($condition && is_array($condition) )

{

$key = array_keys($condition);

if(isset($tmp['data'][$key[0]]) && $tmp['data'][$key[0]] && in_array($tmp['data'][$key[0]], $condition[$key[0]]))

{

$data['count']++;

}

}

else

{

$data['count']++;

}

unset($tmp);

$handle->next();

}

}

unset($handle , $condition , $tmp , $key , $val);

self::closeFile($date , $appid , $op );

$data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);

$data['memory'] = doubleval((doubleval(memory_get_usage(true)) - doubleval(self::$startMemory))/1024/1024) . ' M';

return $data;

}

/**

* TODO:统计用户数

* @param string $date

* @param int $appid

* @param string $op

* @param bool $midflag :默认为false 则 mid返回空数组;如设为true,则mid数组不为空

* * @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉jsondata字段

* @param array $condition

* @return : array:形如({"mid":[],"count":2181,"diffTime":0.0397667884827,"memory":"3.75 M"})

*/

public function countFileMID($date , $appid , $op , $midflag=false , $jsonFlag=false, $condition=array())

{

//$count = self::total_lineFile($date , $appid , $op );

$count = self::countFile($date , $appid , $op );

$index = ceil($count['count'] / $this->limit);

$result = array('mid'=>array() , 'count'=>0 , 'diffTime'=>0 , 'memory'=>0);

for ($i=0 ; $i<$index ; $i++)

{

$startNum = $this->limit*$i;

$endNum = $this->limit;

$data = self::readFile($date , $appid , $op , $startNum , $endNum , $jsonFlag);

var_dump($data);exit();

if($data['data'] && is_array($data['data']))

{

foreach ($data['data'] as $arr)

{

if($condition && is_array($condition))

{

$key = array_keys($condition);

if(isset($arr['data'][$key[0]]) && (in_array($arr['data'][$key[0]] , $condition[$key]) || empty($condition[$key[0]])))

{

$result['mid'][$arr["mid"]] =1;

$result['count']++;

}

}

else

{

$result['mid'][$arr["mid"]] =1;

$result['count']++;

}

unset($data);

}

}

}

unset($index , $count , $condition , $data , $arr);

self::closeFile($date , $appid , $op);

$result['mid'] = array_keys($result['mid']);

if(empty($midflag)) unset($result['mid']);

$result['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);

$result['memory'] = (memory_get_usage(true) - self::$startMemory)/1024/1024 . ' M';

return $result;

}

/**

* TODO:跨时间段 统计参加$op用户数据

* @param string $date

* @param int $appid

* @param string $op

* @param number $day

* @param bool $midflag :默认为false 则 mid返回空数组;如设为true,则mid数组不为空

* @return array 形如("20140326":{"mid":[],"count":4571,"diffTime":0.0806441307068,"memory":"3.75 M"},

*"20140325":{"mid":[],"count":2181,"diffTime":0.0397667884827,"memory":"3.75 M"})

*/

public function getReturnUser($date , $appid , $op , $day=1 , $midflag=false)

{

$date_i=0;

for ($i =0; $i<$day ; $i++){

$date_i = date('Ymd' , strtotime($date)-$i*86400);

$result[$date_i] = self::countFileMID($date_i , $appid , $op , $midflag);

}

unset($date , $date_i , $appid , $op , $day);

return $result;

}

}

?>

Lib_File1.php

复制代码 代码如下:

<?php

class Lib_File1

{

//文件目录

private $root = '/data/wwwroot/kkpromo/data/';

//文件后缀

private $suffix = '.log';

//文件句柄

private $hander=null;

//一次读取文件的最大记录数

private $limit=40000;

//每行读取的字节长度

private $length=1024;

//开始时间

private $startTime=0;

//内存使用基准点

private static $startMemory=0;

//

private $conn=null;

//

private static $init=null;

public static function instance()

{

self::$startMemory = memory_get_usage(true);

if(self::$init && is_object(self::$init))

{

return self::$init;

}

self::$init = new self();

return self::$init;

}

private function __construct(){}

public function setRoot($root)

{

if(!is_dir($root)) die($root.' ROOT DOES NOT EXIST');

$this->root = $root;

}

public function setSuffix($suffix)

{

$this->suffix = $suffix;

}

public function setLimit($limit)

{

if(!is_numeric($limit)) die($limit.' SHOULD BE NUMBERIC');

if(intval($limit) > 1000000) die($limit.' SHOULD BE LOWER THAN 1000000');

$this->limit = intval($limit);

}

private function _getFile($date , $appid , $op)

{

$filename = rtrim($this->root , '/').DIRECTORY_SEPARATOR.$date.DIRECTORY_SEPARATOR.$appid.'.'.$op.$this->suffix;

if(!file_exists($filename))

{

die($filename.'FILE DOES NOT EXISTS!');

}

if(!is_file($filename))

{

die($filename.' FILE DOES NOT EXIST!');

}

if(!is_readable($filename))

{

die($filename.' FILE ACCESS DENY!');

}

return $filename;

}

public function closeFile($date=null , $appid=null , $op=null)

{

if($op && $date && $appid)

{

if(is_object($this->hander[$date.'_'.$appid.'_'.$op]) || $this->conn[$date.'_'.$appid.'_'.$op])

{

fclose($this->hander[$date.'_'.$appid.'_'.$op]);

}

$this->conn[$date.'_'.$appid.'_'.$op]=null;

$this->hander[$date.'_'.$appid.'_'.$op]=null;

}

else {

if(is_array($this->hander) && $this->hander)

{

foreach ($this->hander as $key=>$val){

fclose($this->hander[$key]);

$this->conn[$key]=null;

$this->hander[$key]=null;

}

}

}

return true;

}

private function _openFile($date , $appid , $op)

{

$this->startTime = microtime(true);

if(isset($this->conn[$date.'_'.$appid.'_'.$op]) && $this->conn[$date.'_'.$appid.'_'.$op])

{

return $this->hander[$date.'_'.$appid.'_'.$op];

}

$filename = self::_getFile($date , $appid , $op);

if(($this->hander[$date.'_'.$appid.'_'.$op] = fopen($filename, 'r'))!=null)

{

$this->conn[$date.'_'.$appid.'_'.$op] = true;

return $this->hander[$date.'_'.$appid.'_'.$op];

}

else {

die('FILE OPEN FAILED!');

}

}

/**

* 功能:解析数据

* 格式: array('timestamp','mid','data');

* @param string $data

* @return boolean|array

*/

private function _parseData($data)

{

if(empty($data) || !is_string($data)) return false;

$result = array(

'timestamp'=>0,

'mid'=>0,

'data'=>array(),

);

$data = explode('|', $data);

if(count($data) < 3 || !is_array($data)) return false;

$result['timestamp'] = $data[0];

$result['mid'] = $data[1];

$result['data'] = @json_decode($data[2] , true);

if(empty($result['timestamp']) || empty($result['mid'])) return false;

unset($data);

return $result;

}

/**

* TODO:读取单一文件

* @param string $date: 如(20140327)

* @param int $appid: 如(1000,9000)

* @param string $op:如(show,login , index)

* @param number $startNum 默认从第一行开始

* @param number $endNum 默认到$this->limit结束

* @param array$condition:array('mid'=>arrray() , 'ip'=>array() , ...) 过滤条件

* @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉false字段

* @return array(count , diffTime , memory , data)

*/

public function readFile($date , $appid , $op ,$startNum=0 , $endNum=0 , $jsonFlag=false , $condition=array())

{

$data['data'] = "";

$data['count'] = 0;

$index = $startNum;

$startNum = empty($startNum) ? 0 : $startNum;

$endNum = empty($endNum) ? $this->limit : $endNum;

$hander = self::_openFile($date , $appid , $op );

$tmpindex=0;

if($hander)

{

//!feof($hander)

while ($tmpindex < $endNum)

{

$tmp = fgets($hander , $this->length);

if(empty($tmp)) continue;

if($tmpindex < $endNum && $tmpindex >=$startNum)

{

$tmp = self::_parseData($tmp);

if(empty($tmp)) continue;

//去掉jsondata

if(!$jsonFlag) {unset($tmp[2]); $condition= array(); }

//条件过滤

if($condition && is_array($condition) )

{

foreach ($condition as $key=>$val){

if(in_array($tmp['data'][$key], $condition[$key]))

unset($key , $val);

$data['count']++;

$data['data'][$index] = $tmp;

$index++;

}

}

else{

$data['data'][$index] = $tmp;

$index++;

$data['count']++;

}

}

if($tmpindex >= $endNum) break;

$tmpindex++;

unset($tmp);

}

fseek($hander , SEEK_END);

}

$data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);

$data['memory'] = doubleval((doubleval(memory_get_usage(true)) - doubleval(self::$startMemory))/1024/1024) . ' M';

return $data;

}

/**

* TODO:命令行下获取文件总记录数*

* @param string $date

* @param int $appid

* @param string $op

* @return array

*/

public function total_lineFile($date, $appid, $op)

{

$this->_openFile($date, $appid, $op);

$file = escapeshellarg($this->_getFile($date, $appid, $op)); // 对命令行参数进行安全转义

$line = `wc -l $file`;

if(preg_match("/(d{1,})/", $line , $ret)){

$data['count']=$ret[1];

}else{

$data['count']=0;

}

$data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);

$data['memory'] = doubleval((doubleval(memory_get_usage(true)) - doubleval(self::$startMemory))/1024/1024) . ' M';

return $data;

}

/**

* TODO:统计{$data}.{$op}.log记录数

* @param string $date

* @param int $appid

* @param string $op

* @param array $condition

* @return array

*/

public function countFile($date , $appid , $op ,$condition=array())

{

$data['count'] = 0;

$hander = self::_openFile($date , $appid , $op );

if($hander)

{

while (!feof($hander))

{

$tmp = fgets($hander , $this->length);

$tmp = self::_parseData($tmp);

if(empty($tmp)) continue;

if($condition && is_array($condition) )

{

foreach ($condition as $key=>$val){

if(isset($tmp['data'][$key]) && $tmp['data'][$key] && in_array($tmp['data'][$key], $condition[$key])){

unset($key , $val);

$data['count']++;

}

}

}

else

$data['count']++;

unset($tmp);

}

fseek($hander , SEEK_END);

}

$data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);

$data['memory'] = doubleval((doubleval(memory_get_usage(true)) - doubleval(self::$startMemory))/1024/1024) . ' M';

return $data;

}

/**

* TODO:统计用户数

* @param string $date

* @param int $appid

* @param string $op

* @param bool $midflag :默认为false 则 mid返回空数组;如设为true,则mid数组不为空

* * @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉jsondata字段

* @param array $condition

* @return : array:形如({"mid":[],"count":2181,"diffTime":0.0397667884827,"memory":"3.75 M"})

*/

public function countFileMID($date , $appid , $op , $midflag=false , $jsonFlag=false, $condition=array())

{

$count = self::total_lineFile($date , $appid , $op );

$index = ceil($count['count'] / $this->limit);

$result = array('mid'=>array() , 'count'=>0 , 'diffTime'=>0 , 'memory'=>0);

for ($i=0 ; $i<$index ; $i++)

{

$startNum = $this->limit*$i;

$endNum = $this->limit*($i+1);

$data = self::readFile($date , $appid , $op , $startNum , $endNum , $jsonFlag);

if($data['data'] && is_array($data['data']))

{

foreach ($data['data'] as $arr)

{

if($condition && is_array($condition)){

foreach ($condition as $key=>$val){

if(isset($arr['data'][$key]) && (in_array($arr['data'][$key] , $condition[$key]) || empty($condition[$key]))){

if(!isset($result['mid'][$arr['mid']])){$result['mid'][$arr["mid"]] =1;$result['count']++; }

}

}

}

else {

if(!isset($result['mid'][$arr['mid']])) { $result['mid'][$arr["mid"]] =1;$result['count']++; }

}

}

}

unset($data['data'] , $data);

}

unset($index , $count , $condition , $data , $arr);

self::closeFile($date , $appid , $op);

$result['mid'] = array_keys($result['mid']);

//$result['count'] = count($result['mid']);

if(empty($midflag)) unset($result['mid']);

$result['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);

$result['memory'] = (memory_get_usage(true) - self::$startMemory)/1024/1024 . ' M';

return $result;

}

/**

* TODO:跨时间段 统计参加$op用户数据

* @param string $date

* @param int $appid

* @param string $op

* @param number $day

* @param bool $midflag :默认为false 则 mid返回空数组;如设为true,则mid数组不为空

* @return array 形如("20140326":{"mid":[],"count":4571,"diffTime":0.0806441307068,"memory":"3.75 M"},

*"20140325":{"mid":[],"count":2181,"diffTime":0.0397667884827,"memory":"3.75 M"})

*/

public function getReturnUser($date , $appid , $op , $day=1 , $midflag=false)

{

$date_i=0;

for ($i =0; $i<$day ; $i++){

$date_i = date('Ymd' , strtotime($date)-$i*86400);

$result[$date_i] = self::countFileMID($date_i , $appid , $op , $midflag);

}

unset($date , $date_i , $appid , $op , $day);

return $result;

}

}

?>

【php读取大文件示例分享(文件操作类)】相关文章:

php对文件进行hash运算的方法

php备份数据库类分享

用文本文件制作留言板提示(下)

php环境上传大文件需要注意的事项

php删除文本文件中重复行的方法

为php4加入动态flash文件的生成的支持

用文本文件制作留言板提示(上)

php实现读取和写入tab分割的文件

php操作MongoDB类实例

PHP SPL标准库之文件操作

精品推荐
分类导航