Update: 提供测试用例。

pull/1107/head
hooyantsing 2 years ago
parent 12ce86425f
commit 08f4a4046b

@ -1393,12 +1393,13 @@ public class XpathSelectorTest {
public void htmlCleanerParseTest() { public void htmlCleanerParseTest() {
Spider.create(new RuoxiaPageProcessor()).addUrl("http://www.ruoxia.com/top/dianji/month").thread(1).run(); Spider.create(new RuoxiaPageProcessor()).addUrl("http://www.ruoxia.com/top/dianji/month").thread(1).run();
} }
class RuoxiaPageProcessor implements PageProcessor { class RuoxiaPageProcessor implements PageProcessor {
@Override @Override
public void process(Page page) { public void process(Page page) {
List<Selectable> nodes = page.getHtml().xpath("//div[@class=\"bd\"]//tbody/tr").nodes(); List<String> items = new Xpath2Selector("//div[@class=\"bd\"]//tbody/tr").selectList(page.getRawText());
for (Selectable node:nodes) { for (String item : items) {
String name = node.xpath("//td[3]/div/a[1]/text()").get(); String name = new Xpath2Selector("//td[3]/div/a[1]/text()").select(item);
System.out.println(name); System.out.println(name);
} }
} }
@ -1408,31 +1409,31 @@ public class XpathSelectorTest {
@Test @Test
public void performanceTest() { public void performanceTest() {
Xpath2Selector xpath2Selector = new Xpath2Selector("//a"); Xpath2Selector xpath2Selector = new Xpath2Selector("//a");
long time =System.currentTimeMillis(); long time = System.currentTimeMillis();
for (int i = 0; i < 1000; i++) { for (int i = 0; i < 1000; i++) {
xpath2Selector.selectList(html); xpath2Selector.selectList(html);
} }
System.out.println(System.currentTimeMillis()-time); System.out.println(System.currentTimeMillis() - time);
XpathSelector xpathSelector = new XpathSelector("//a"); XpathSelector xpathSelector = new XpathSelector("//a");
time =System.currentTimeMillis(); time = System.currentTimeMillis();
for (int i = 0; i < 1000; i++) { for (int i = 0; i < 1000; i++) {
xpathSelector.selectList(html); xpathSelector.selectList(html);
} }
System.out.println(System.currentTimeMillis()-time); System.out.println(System.currentTimeMillis() - time);
time =System.currentTimeMillis(); time = System.currentTimeMillis();
for (int i = 0; i < 1000; i++) { for (int i = 0; i < 1000; i++) {
xpath2Selector.selectList(html); xpath2Selector.selectList(html);
} }
System.out.println(System.currentTimeMillis() - time); System.out.println(System.currentTimeMillis() - time);
CssSelector cssSelector = new CssSelector("a"); CssSelector cssSelector = new CssSelector("a");
time =System.currentTimeMillis(); time = System.currentTimeMillis();
for (int i = 0; i < 1000; i++) { for (int i = 0; i < 1000; i++) {
cssSelector.selectList(html); cssSelector.selectList(html);
} }
System.out.println("css "+(System.currentTimeMillis()-time)); System.out.println("css " + (System.currentTimeMillis() - time));
} }
@Ignore("take long time") @Ignore("take long time")
@ -1444,54 +1445,54 @@ public class XpathSelectorTest {
TagNode tagNode = htmlCleaner.clean(html); TagNode tagNode = htmlCleaner.clean(html);
Document document = Jsoup.parse(html); Document document = Jsoup.parse(html);
long time =System.currentTimeMillis(); long time = System.currentTimeMillis();
for (int i = 0; i < 2000; i++) { for (int i = 0; i < 2000; i++) {
htmlCleaner.clean(html); htmlCleaner.clean(html);
} }
System.out.println(System.currentTimeMillis()-time); System.out.println(System.currentTimeMillis() - time);
time =System.currentTimeMillis(); time = System.currentTimeMillis();
for (int i = 0; i < 2000; i++) { for (int i = 0; i < 2000; i++) {
tagNode.evaluateXPath("//a"); tagNode.evaluateXPath("//a");
} }
System.out.println(System.currentTimeMillis()-time); System.out.println(System.currentTimeMillis() - time);
System.out.println("============="); System.out.println("=============");
time =System.currentTimeMillis(); time = System.currentTimeMillis();
for (int i = 0; i < 2000; i++) { for (int i = 0; i < 2000; i++) {
Jsoup.parse(html); Jsoup.parse(html);
} }
System.out.println(System.currentTimeMillis()-time); System.out.println(System.currentTimeMillis() - time);
time =System.currentTimeMillis(); time = System.currentTimeMillis();
for (int i = 0; i < 2000; i++) { for (int i = 0; i < 2000; i++) {
document.select("a"); document.select("a");
} }
System.out.println(System.currentTimeMillis()-time); System.out.println(System.currentTimeMillis() - time);
System.out.println("============="); System.out.println("=============");
time =System.currentTimeMillis(); time = System.currentTimeMillis();
for (int i = 0; i < 2000; i++) { for (int i = 0; i < 2000; i++) {
htmlCleaner.clean(html); htmlCleaner.clean(html);
} }
System.out.println(System.currentTimeMillis()-time); System.out.println(System.currentTimeMillis() - time);
time =System.currentTimeMillis(); time = System.currentTimeMillis();
for (int i = 0; i < 2000; i++) { for (int i = 0; i < 2000; i++) {
tagNode.evaluateXPath("//a"); tagNode.evaluateXPath("//a");
} }
System.out.println(System.currentTimeMillis()-time); System.out.println(System.currentTimeMillis() - time);
System.out.println("============="); System.out.println("=============");
XPathEvaluator compile = Xsoup.compile("//a"); XPathEvaluator compile = Xsoup.compile("//a");
time =System.currentTimeMillis(); time = System.currentTimeMillis();
for (int i = 0; i < 2000; i++) { for (int i = 0; i < 2000; i++) {
compile.evaluate(document); compile.evaluate(document);
} }
System.out.println(System.currentTimeMillis()-time); System.out.println(System.currentTimeMillis() - time);
} }

Loading…
Cancel
Save