htmlunit及xpath使用

Life is short , play more!
本文来自lihao's Blog,转载请注明。

下面的例子使用htmlunit及xpath , 来获取某网页html中的数据. 可以用来爬虫获取数据等.

package com.whoistester.test.report.module;
import java.util.List;

import com.gargoylesoftware.htmlunit.WebClient;

import com.gargoylesoftware.htmlunit.html.HtmlDivision;
import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlPage;

public class Project {

String projectDescription;
String projectKey;
String buildNumber;
float rulesCompliance;
int blocker;
int critical;
int major;
int minor;
int info;
float unitTestsCoverage;
float lineCoverage;
float branchCoverage;
float unitTestSucess;
int testFailures;
int errors;
int tests;
String seconds;
float packageTangleIndex;
int cycles;
float methodComplexity;
float classComplexity;
float fileComplexity;
String host;
String url;

public Project(String projectKey, String projectDescription,String host)
{
this.projectKey = projectKey;
this.projectDescription = projectDescription;
this.host = host;
getData();
}

private void getData()
{

this.url = this.host+this.projectKey;

HtmlPage page = null;
WebClient webclient =null ;

try {
webclient = new WebClient();
page = webclient.getPage(this.url);

}

catch(Exception e)
{

}

if(null == page || webclient == null) return;

this.buildNumber = getElementStringText(page,”//div[@id=’snapshot_title’]/h4″);

this.rulesCompliance = getElementFloatText(page, “//span[@id=’m_violations_density’]”);

this.blocker = getElementIntText(page,”//span[@id=’m_blocker_violations’]”);
this.critical = getElementIntText(page,”//span[@id=’m_critical_violations’]”);
this.major = getElementIntText(page,”//span[@id=’m_major_violations’]”);
this.minor = getElementIntText(page,”//span[@id=’m_minor_violations’]”);
this.info = getElementIntText(page,”//span[@id=’m_info_violations’]”);
this.unitTestsCoverage = getElementFloatText(page, “//span[@id=’m_coverage’]”);
this.lineCoverage = getElementFloatText(page, “//span[@id=’m_line_coverage’]”);
this.branchCoverage = getElementFloatText(page, “//span[@id=’m_branch_coverage’]”);
this.tests = getElementIntText(page,”//span[@id=’m_tests’]”);
this.unitTestSucess = getElementFloatText(page, “//span[@id=’m_test_success_density’]”);

this.testFailures = getElementIntText(page,”//span[@id=’m_test_failures’]”);
this.errors = getElementIntText(page,”//span[@id=’m_test_errors’]”);
this.seconds = getElementStringText(page,”//span[@id=’m_test_execution_time’]”);

this.packageTangleIndex = getElementFloatText(page,”//span[@id=’m_package_tangle_index’]”);
this.cycles = getElementIntText(page,”//span[@id=’m_package_cycles’]”);

this.methodComplexity = getElementFloatText(page,”//span[@id=’m_function_complexity’]”);
this.classComplexity = getElementFloatText(page,”//span[@id=’m_class_complexity’]”);
this.fileComplexity = getElementFloatText(page,”//span[@id=’m_file_complexity’]”);

}

private float getElementFloatText(final HtmlPage page , final String xpath)
{
List temp ;
HtmlElement element;
temp = page.getByXPath(xpath);
if(temp.size()>0)
{
element = (HtmlElement) temp.get(0);
return Float.valueOf(element.getTextContent().replace(“%”, “”));
}
return -1;
}

private int getElementIntText(final HtmlPage page , final String xpath)
{
List temp ;
HtmlElement element;
temp = page.getByXPath(xpath);
if(temp.size()>0)
{
element = (HtmlElement) temp.get(0);
return Integer.valueOf(element.getTextContent().replace(“,”, “”));
}
return -1;
}

private String getElementStringText(final HtmlPage page , final String xpath)
{
List temp ;
HtmlElement element;
temp = page.getByXPath(xpath);
if(temp.size()>0)
{
element = (HtmlElement) temp.get(0);
return element.getTextContent();
}
return “”;
}

public static void main(String [] args)
{

//https://dev.eclipse.org/sonar/dashboard/index/

Project a = new Project(“10616″,”test”,”https://dev.eclipse.org/sonar/dashboard/index/”);

System.out.println(a.blocker+” “+a.critical + ” “+a.major);

}

}


发表评论

电子邮件地址不会被公开。 必填项已用*标注