update xsoup version to release #113

pull/130/head
yihua.huang 11 years ago
parent a5d1b56e44
commit 95bdb30296

@ -88,7 +88,7 @@
<dependency> <dependency>
<groupId>us.codecraft</groupId> <groupId>us.codecraft</groupId>
<artifactId>xsoup</artifactId> <artifactId>xsoup</artifactId>
<version>0.2.4-SNAPSHOT</version> <version>0.2.4</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.alibaba</groupId> <groupId>com.alibaba</groupId>

@ -10,6 +10,7 @@ import us.codecraft.webmagic.selector.Selectable;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.util.List;
/** /**
* @author code4crafer@gmail.com * @author code4crafer@gmail.com
@ -20,11 +21,14 @@ public class MamacnPageProcessor implements PageProcessor {
@Override @Override
public void process(Page page) { public void process(Page page) {
Selectable images = page.getHtml().xpath("//ul[@id=ma-thumb-list]/li"); List<Selectable> nodes = page.getHtml().xpath("//ul[@id=ma-thumb-list]/li").nodes();
page.putField("img", images.xpath("//div[@class=picList]/div[@class=pre]/div[@class=npic]//img/@src").get()); StringBuilder accum = new StringBuilder();
page.putField("title", page.getHtml().xpath("//div[@class=picList]/div[@class=pre]/div[@class=npic]//img/@alt").get()); for (Selectable node : nodes) {
page.putField("url", page.getUrl().toString()); accum.append("img:").append(node.xpath("//a/@href").get()).append("\n");
if (page.getResultItems().get("title") == null) { accum.append("title:").append(node.xpath("//img/@alt").get()).append("\n");
}
page.putField("",accum.toString());
if (accum.length() == 0) {
page.setSkip(true); page.setSkip(true);
} }
page.addTargetRequests(page.getHtml().links().regex("http://www\\.mama\\.cn/photo/.*\\.html").all()); page.addTargetRequests(page.getHtml().links().regex("http://www\\.mama\\.cn/photo/.*\\.html").all());

Loading…
Cancel
Save