From 08f4a4046b4cb13a81684533534a7d51640c3e04 Mon Sep 17 00:00:00 2001 From: hooyantsing Date: Fri, 3 Feb 2023 22:59:56 +0800 Subject: [PATCH] =?UTF-8?q?Update:=20=E6=8F=90=E4=BE=9B=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E7=94=A8=E4=BE=8B=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../webmagic/selector/XpathSelectorTest.java | 49 ++++++++++--------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/webmagic-saxon/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java b/webmagic-saxon/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java index c2025e7c..8ac72193 100644 --- a/webmagic-saxon/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java +++ b/webmagic-saxon/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java @@ -1393,12 +1393,13 @@ public class XpathSelectorTest { public void htmlCleanerParseTest() { Spider.create(new RuoxiaPageProcessor()).addUrl("http://www.ruoxia.com/top/dianji/month").thread(1).run(); } + class RuoxiaPageProcessor implements PageProcessor { @Override public void process(Page page) { - List nodes = page.getHtml().xpath("//div[@class=\"bd\"]//tbody/tr").nodes(); - for (Selectable node:nodes) { - String name = node.xpath("//td[3]/div/a[1]/text()").get(); + List items = new Xpath2Selector("//div[@class=\"bd\"]//tbody/tr").selectList(page.getRawText()); + for (String item : items) { + String name = new Xpath2Selector("//td[3]/div/a[1]/text()").select(item); System.out.println(name); } } @@ -1408,31 +1409,31 @@ public class XpathSelectorTest { @Test public void performanceTest() { Xpath2Selector xpath2Selector = new Xpath2Selector("//a"); - long time =System.currentTimeMillis(); + long time = System.currentTimeMillis(); for (int i = 0; i < 1000; i++) { xpath2Selector.selectList(html); } - System.out.println(System.currentTimeMillis()-time); + System.out.println(System.currentTimeMillis() - time); XpathSelector xpathSelector = new XpathSelector("//a"); - time =System.currentTimeMillis(); + time = System.currentTimeMillis(); for (int i = 0; i < 1000; i++) { xpathSelector.selectList(html); } - System.out.println(System.currentTimeMillis()-time); + System.out.println(System.currentTimeMillis() - time); - time =System.currentTimeMillis(); + time = System.currentTimeMillis(); for (int i = 0; i < 1000; i++) { xpath2Selector.selectList(html); } System.out.println(System.currentTimeMillis() - time); CssSelector cssSelector = new CssSelector("a"); - time =System.currentTimeMillis(); + time = System.currentTimeMillis(); for (int i = 0; i < 1000; i++) { cssSelector.selectList(html); } - System.out.println("css "+(System.currentTimeMillis()-time)); + System.out.println("css " + (System.currentTimeMillis() - time)); } @Ignore("take long time") @@ -1444,54 +1445,54 @@ public class XpathSelectorTest { TagNode tagNode = htmlCleaner.clean(html); Document document = Jsoup.parse(html); - long time =System.currentTimeMillis(); + long time = System.currentTimeMillis(); for (int i = 0; i < 2000; i++) { htmlCleaner.clean(html); } - System.out.println(System.currentTimeMillis()-time); + System.out.println(System.currentTimeMillis() - time); - time =System.currentTimeMillis(); + time = System.currentTimeMillis(); for (int i = 0; i < 2000; i++) { tagNode.evaluateXPath("//a"); } - System.out.println(System.currentTimeMillis()-time); + System.out.println(System.currentTimeMillis() - time); System.out.println("============="); - time =System.currentTimeMillis(); + time = System.currentTimeMillis(); for (int i = 0; i < 2000; i++) { Jsoup.parse(html); } - System.out.println(System.currentTimeMillis()-time); + System.out.println(System.currentTimeMillis() - time); - time =System.currentTimeMillis(); + time = System.currentTimeMillis(); for (int i = 0; i < 2000; i++) { document.select("a"); } - System.out.println(System.currentTimeMillis()-time); + System.out.println(System.currentTimeMillis() - time); System.out.println("============="); - time =System.currentTimeMillis(); + time = System.currentTimeMillis(); for (int i = 0; i < 2000; i++) { htmlCleaner.clean(html); } - System.out.println(System.currentTimeMillis()-time); + System.out.println(System.currentTimeMillis() - time); - time =System.currentTimeMillis(); + time = System.currentTimeMillis(); for (int i = 0; i < 2000; i++) { tagNode.evaluateXPath("//a"); } - System.out.println(System.currentTimeMillis()-time); + System.out.println(System.currentTimeMillis() - time); System.out.println("============="); XPathEvaluator compile = Xsoup.compile("//a"); - time =System.currentTimeMillis(); + time = System.currentTimeMillis(); for (int i = 0; i < 2000; i++) { compile.evaluate(document); } - System.out.println(System.currentTimeMillis()-time); + System.out.println(System.currentTimeMillis() - time); }