手机
当前位置:查字典教程网 >编程开发 >Java >java正则表达式解析html示例分享
java正则表达式解析html示例分享
摘要:复制代码代码如下:packagework;importjava.io.BufferedReader;importjava.io.IOExce...

复制代码 代码如下:

package work;

import java.io.BufferedReader;

import java.io.IOException;

import java.io.InputStream;

import java.io.InputStreamReader;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;

import org.apache.commons.httpclient.HttpClient;

import org.apache.commons.httpclient.HttpException;

import org.apache.commons.httpclient.HttpStatus;

import org.apache.commons.httpclient.methods.GetMethod;

import org.apache.commons.httpclient.params.HttpMethodParams;

public class chuanboyi {

public static void main(String[] args){

// TODO Auto-generated method stub

StringBuffer html = new StringBuffer();

HttpClient httpclient = new HttpClient();

//创建GET方法实例

GetMethod getMethod = new GetMethod("http://www.jb51.net");

//使用系统提供的默认恢复策略

getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler());

try{

//执行GET方法

int statusCode = httpclient.executeMethod(getMethod);

if(statusCode != HttpStatus.SC_OK){

System.out.println("Method is wrong " + getMethod.getStatusLine());

}

InputStream responseBody = getMethod.getResponseBodyAsStream();

BufferedReader reader = new BufferedReader(new InputStreamReader(responseBody,"utf-8"));

String line = reader.readLine();

while(line != null){

html.append(line).append("n");

line = reader.readLine();

}

reader.close();

//正则表达式

String regex = "<form name="compareForm"[sS]+>[sS]+</form>.*<script.*>";

String regexa ="(?<=<li>)[sS]+?(?=</li>)";

Pattern pattern = Pattern.compile(regex);

Matcher m = pattern.matcher(html);

StringBuffer str = new StringBuffer();

int i = 0;

while(m.find()){

str.append(m.group());

}

pattern = Pattern.compile(regexa);

m = pattern.matcher(str);

while(m.find()){

attrs(m.group());

i++;

}

System.out.println("共有"+i+"条数据!");

}catch (HttpException e) {

// TODO: handle exception

System.out.println("Please check your provided http address!");

e.printStackTrace();

}catch (IOException e) {

// TODO: handle exception

System.out.println("the line is wrong!");

e.printStackTrace();

}finally{

getMethod.releaseConnection();//释放链接

}

}

public static void attrs(String str){

//获取url的正则表达式

String regexURL = "[a-z]+-[0-9]+.html";

//获取Name的正则表达式

String regexName = "(?<=title=")[[w-s][^x00-xff]]+(?=")";

//获取图片的正则表达式

String regexPicture = "images.*.jpg";

Pattern patternURL = Pattern.compile(regexURL);

Pattern patternName = Pattern.compile(regexName);

Pattern patternPicture = Pattern.compile(regexPicture);

Matcher mURL = patternURL.matcher(str);

Matcher mName = patternName.matcher(str);

Matcher mPicture = patternPicture.matcher(str);

if(mName.find()){

System.out.println("名字:"+mName.group());

}

if(mURL.find()){

System.out.println("链接:"+mURL.group());

}

if(mPicture.find()){

System.out.println("图片:"+mPicture.group());

}

}

}

【java正则表达式解析html示例分享】相关文章:

利用Java正则表达式校验邮箱与手机号

简单的java读取文件示例分享

java中使用sax解析xml的解决方法

java dom4j解析xml文件代码实例分享

java实现sunday算法示例分享

java中vector与hashtable操作实例分享

java解析xml之jdom解析xml示例分享

java信号量控制线程打印顺序的示例分享

Java 正则表达式 解释说明

java向文件末尾添加内容示例分享

精品推荐
分类导航