update pom

pull/17/head
yihua.huang 12 years ago
parent 3ba7a76f44
commit 7829c8fe02

@ -35,6 +35,7 @@
<modules> <modules>
<module>webmagic-core</module> <module>webmagic-core</module>
<module>webmagic-extension/</module> <module>webmagic-extension/</module>
<module>webmagic-samples</module>
</modules> </modules>
<dependencyManagement> <dependencyManagement>

@ -2,9 +2,9 @@ package us.codecraft.webmagic.model.samples;
import us.codecraft.webmagic.PagedModel; import us.codecraft.webmagic.PagedModel;
import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.model.*; import us.codecraft.webmagic.model.OOSpider;
import us.codecraft.webmagic.model.annotation.ComboExtract;
import us.codecraft.webmagic.model.annotation.ExtractBy; import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.ExtractBy2;
import us.codecraft.webmagic.model.annotation.ExtractByUrl; import us.codecraft.webmagic.model.annotation.ExtractByUrl;
import us.codecraft.webmagic.model.annotation.TargetUrl; import us.codecraft.webmagic.model.annotation.TargetUrl;
import us.codecraft.webmagic.pipeline.ConsolePipeline; import us.codecraft.webmagic.pipeline.ConsolePipeline;
@ -16,8 +16,8 @@ import java.util.List;
/** /**
* @author code4crafter@gmail.com <br> * @author code4crafter@gmail.com <br>
* Date: 13-8-4 <br> * Date: 13-8-4 <br>
* Time: 8:17 <br> * Time: 8:17 <br>
*/ */
@TargetUrl("http://news.163.com/\\d+/\\d+/\\d+/\\w+*.html") @TargetUrl("http://news.163.com/\\d+/\\d+/\\d+/\\w+*.html")
public class News163 implements PagedModel { public class News163 implements PagedModel {
@ -28,8 +28,9 @@ public class News163 implements PagedModel {
@ExtractByUrl(value = "http://news\\.163\\.com/\\d+/\\d+/\\d+/\\w+_(\\d+)\\.html", notNull = false) @ExtractByUrl(value = "http://news\\.163\\.com/\\d+/\\d+/\\d+/\\w+_(\\d+)\\.html", notNull = false)
private String page; private String page;
@ExtractBy(value = "//div[@class=\"ep-pages\"]//a/@href", multi = true,notNull = false) @ComboExtract(value = {@ExtractBy("//div[@class=\"ep-pages\"]//a/@href"),
@ExtractBy2(value = "http://news\\.163\\.com/\\d+/\\d+/\\d+/\\w+_(\\d+)\\.html", type = ExtractBy2.Type.Regex) @ExtractBy(value = "http://news\\.163\\.com/\\d+/\\d+/\\d+/\\w+_(\\d+)\\.html", type = ExtractBy.Type.Regex)},
multi = true, notNull = false)
private List<String> otherPage; private List<String> otherPage;
@ExtractBy("//h1[@id=\"h1title\"]/text()") @ExtractBy("//h1[@id=\"h1title\"]/text()")

Loading…
Cancel
Save