xsoup test

pull/23/merge
yihua.huang 12 years ago
parent e87489d5dc
commit b1cba78bd6

@ -17,6 +17,11 @@
<artifactId>webmagic-core</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>us.codecraft</groupId>
<artifactId>xsoup</artifactId>
<version>0.0.1-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>net.sf.saxon</groupId>
<artifactId>Saxon-HE</artifactId>

@ -1,8 +1,15 @@
package us.codecraft.webmagic.selector;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
import org.htmlcleaner.XPatherException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
import us.codecraft.xsoup.XPathEvaluator;
import us.codecraft.xsoup.Xsoup;
/**
* @author code4crafter@gmail.com <br> Date: 13-4-21 Time: 10:06
@ -1353,6 +1360,7 @@ public class XpathSelectorTest {
Html html1 = new Html(html);
Assert.assertEquals("再次吐槽easyui", html1.xpath(".//*[@class='QTitle']/h1/a").toString());
Assert.assertNotNull(html1.$("a[href]").xpath("//@href").all());
Selectors.xpath("/abc/").select("");
}
@Test
@ -1379,17 +1387,86 @@ public class XpathSelectorTest {
xpath2Selector.selectList(html);
}
System.out.println(System.currentTimeMillis()-time);
XpathSelector xpathSelector = new XpathSelector("//a");
time =System.currentTimeMillis();
for (int i = 0; i < 1000; i++) {
xpathSelector.selectList(html);
}
System.out.println(System.currentTimeMillis()-time);
time =System.currentTimeMillis();
for (int i = 0; i < 1000; i++) {
xpath2Selector.selectList(html);
}
System.out.println(System.currentTimeMillis() - time);
CssSelector cssSelector = new CssSelector("a");
time =System.currentTimeMillis();
for (int i = 0; i < 1000; i++) {
cssSelector.selectList(html);
}
System.out.println("css "+(System.currentTimeMillis()-time));
}
@Ignore("take long time")
@Test
public void parserPerformanceTest() throws XPatherException {
System.out.println(html.length());
HtmlCleaner htmlCleaner = new HtmlCleaner();
TagNode tagNode = htmlCleaner.clean(html);
Document document = Jsoup.parse(html);
long time =System.currentTimeMillis();
for (int i = 0; i < 2000; i++) {
htmlCleaner.clean(html);
}
System.out.println(System.currentTimeMillis()-time);
time =System.currentTimeMillis();
for (int i = 0; i < 2000; i++) {
tagNode.evaluateXPath("//a");
}
System.out.println(System.currentTimeMillis()-time);
System.out.println("=============");
time =System.currentTimeMillis();
for (int i = 0; i < 2000; i++) {
Jsoup.parse(html);
}
System.out.println(System.currentTimeMillis()-time);
time =System.currentTimeMillis();
for (int i = 0; i < 2000; i++) {
document.select("a");
}
System.out.println(System.currentTimeMillis()-time);
System.out.println("=============");
time =System.currentTimeMillis();
for (int i = 0; i < 2000; i++) {
htmlCleaner.clean(html);
}
System.out.println(System.currentTimeMillis()-time);
time =System.currentTimeMillis();
for (int i = 0; i < 2000; i++) {
tagNode.evaluateXPath("//a");
}
System.out.println(System.currentTimeMillis()-time);
System.out.println("=============");
XPathEvaluator compile = Xsoup.compile("//a");
time =System.currentTimeMillis();
for (int i = 0; i < 2000; i++) {
compile.evaluate(document);
}
System.out.println(System.currentTimeMillis()-time);
}
}

Loading…
Cancel
Save