update xsoup version to release #113

pull/130/head
yihua.huang 11 years ago
parent a5d1b56e44
commit 95bdb30296

@ -88,7 +88,7 @@
<dependency>
<groupId>us.codecraft</groupId>
<artifactId>xsoup</artifactId>
<version>0.2.4-SNAPSHOT</version>
<version>0.2.4</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>

@ -10,6 +10,7 @@ import us.codecraft.webmagic.selector.Selectable;
import java.io.FileNotFoundException;
import java.io.UnsupportedEncodingException;
import java.util.List;
/**
* @author code4crafer@gmail.com
@ -20,11 +21,14 @@ public class MamacnPageProcessor implements PageProcessor {
@Override
public void process(Page page) {
Selectable images = page.getHtml().xpath("//ul[@id=ma-thumb-list]/li");
page.putField("img", images.xpath("//div[@class=picList]/div[@class=pre]/div[@class=npic]//img/@src").get());
page.putField("title", page.getHtml().xpath("//div[@class=picList]/div[@class=pre]/div[@class=npic]//img/@alt").get());
page.putField("url", page.getUrl().toString());
if (page.getResultItems().get("title") == null) {
List<Selectable> nodes = page.getHtml().xpath("//ul[@id=ma-thumb-list]/li").nodes();
StringBuilder accum = new StringBuilder();
for (Selectable node : nodes) {
accum.append("img:").append(node.xpath("//a/@href").get()).append("\n");
accum.append("title:").append(node.xpath("//img/@alt").get()).append("\n");
}
page.putField("",accum.toString());
if (accum.length() == 0) {
page.setSkip(true);
}
page.addTargetRequests(page.getHtml().links().regex("http://www\\.mama\\.cn/photo/.*\\.html").all());

Loading…
Cancel
Save