手机
当前位置:查字典教程网 >编程开发 >Java >java正则表达式解析html示例分享
java正则表达式解析html示例分享
摘要:复制代码代码如下:packagework;importjava.io.BufferedReader;importjava.io.IOExce...

复制代码 代码如下:

package work;

import java.io.BufferedReader;

import java.io.IOException;

import java.io.InputStream;

import java.io.InputStreamReader;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;

import org.apache.commons.httpclient.HttpClient;

import org.apache.commons.httpclient.HttpException;

import org.apache.commons.httpclient.HttpStatus;

import org.apache.commons.httpclient.methods.GetMethod;

import org.apache.commons.httpclient.params.HttpMethodParams;

public class chuanboyi {

public static void main(String[] args){

// TODO Auto-generated method stub

StringBuffer html = new StringBuffer();

HttpClient httpclient = new HttpClient();

//创建GET方法实例

GetMethod getMethod = new GetMethod("http://www.jb51.net");

//使用系统提供的默认恢复策略

getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler());

try{

//执行GET方法

int statusCode = httpclient.executeMethod(getMethod);

if(statusCode != HttpStatus.SC_OK){

System.out.println("Method is wrong " + getMethod.getStatusLine());

}

InputStream responseBody = getMethod.getResponseBodyAsStream();

BufferedReader reader = new BufferedReader(new InputStreamReader(responseBody,"utf-8"));

String line = reader.readLine();

while(line != null){

html.append(line).append("n");

line = reader.readLine();

}

reader.close();

//正则表达式

String regex = "<form name="compareForm"[sS]+>[sS]+</form>.*<script.*>";

String regexa ="(?<=<li>)[sS]+?(?=</li>)";

Pattern pattern = Pattern.compile(regex);

Matcher m = pattern.matcher(html);

StringBuffer str = new StringBuffer();

int i = 0;

while(m.find()){

str.append(m.group());

}

pattern = Pattern.compile(regexa);

m = pattern.matcher(str);

while(m.find()){

attrs(m.group());

i++;

}

System.out.println("共有"+i+"条数据!");

}catch (HttpException e) {

// TODO: handle exception

System.out.println("Please check your provided http address!");

e.printStackTrace();

}catch (IOException e) {

// TODO: handle exception

System.out.println("the line is wrong!");

e.printStackTrace();

}finally{

getMethod.releaseConnection();//释放链接

}

}

public static void attrs(String str){

//获取url的正则表达式

String regexURL = "[a-z]+-[0-9]+.html";

//获取Name的正则表达式

String regexName = "(?<=title=")[[w-s][^x00-xff]]+(?=")";

//获取图片的正则表达式

String regexPicture = "images.*.jpg";

Pattern patternURL = Pattern.compile(regexURL);

Pattern patternName = Pattern.compile(regexName);

Pattern patternPicture = Pattern.compile(regexPicture);

Matcher mURL = patternURL.matcher(str);

Matcher mName = patternName.matcher(str);

Matcher mPicture = patternPicture.matcher(str);

if(mName.find()){

System.out.println("名字:"+mName.group());

}

if(mURL.find()){

System.out.println("链接:"+mURL.group());

}

if(mPicture.find()){

System.out.println("图片:"+mPicture.group());

}

}

}

【java正则表达式解析html示例分享】相关文章:

java字符串反转示例分享

java日期工具类实例分享

java配置dbcp连接池(数据库连接池)示例分享

java集合求和最大值最小值示例分享

java中使用sax解析xml的解决方法

利用Java正则表达式校验邮箱与手机号

java实现sunday算法示例分享

java控制台输出数字金字塔示例分享

java使用数组和链表实现队列示例

在java中使用dom解析xml的示例分析

精品推荐
分类导航