|
|
|
@ -1393,12 +1393,13 @@ public class XpathSelectorTest {
|
|
|
|
|
public void htmlCleanerParseTest() {
|
|
|
|
|
Spider.create(new RuoxiaPageProcessor()).addUrl("http://www.ruoxia.com/top/dianji/month").thread(1).run();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
class RuoxiaPageProcessor implements PageProcessor {
|
|
|
|
|
@Override
|
|
|
|
|
public void process(Page page) {
|
|
|
|
|
List<Selectable> nodes = page.getHtml().xpath("//div[@class=\"bd\"]//tbody/tr").nodes();
|
|
|
|
|
for (Selectable node:nodes) {
|
|
|
|
|
String name = node.xpath("//td[3]/div/a[1]/text()").get();
|
|
|
|
|
List<String> items = new Xpath2Selector("//div[@class=\"bd\"]//tbody/tr").selectList(page.getRawText());
|
|
|
|
|
for (String item : items) {
|
|
|
|
|
String name = new Xpath2Selector("//td[3]/div/a[1]/text()").select(item);
|
|
|
|
|
System.out.println(name);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|