diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/Kr36NewsModel.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/Kr36NewsModel.java new file mode 100644 index 00000000..bba8d829 --- /dev/null +++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/Kr36NewsModel.java @@ -0,0 +1,33 @@ +package us.codecraft.webmagic.model.samples; + +import us.codecraft.webmagic.Site; +import us.codecraft.webmagic.model.ConsolePageModelPipeline; +import us.codecraft.webmagic.model.OOSpider; +import us.codecraft.webmagic.model.annotation.ExtractBy; +import us.codecraft.webmagic.model.annotation.ExtractByUrl; +import us.codecraft.webmagic.model.annotation.HelpUrl; +import us.codecraft.webmagic.model.annotation.TargetUrl; + +/** + * @author code4crafter@gmail.com
+ * Date: 13-8-11
+ * Time: 下午9:29
+ */ +@TargetUrl("http://www.36kr.com/p/\\d+.html") +@HelpUrl("http://www.36kr.com/#/page/\\d+") +public class Kr36NewsModel { + + @ExtractBy("//h1[@class='entry-title sep10']") + private String title; + + @ExtractBy("//div[@class='mainContent sep-10']") + private String content; + + @ExtractByUrl + private String url; + + public static void main(String[] args) { + OOSpider.create(Site.me().addStartUrl("http://www.36kr.com/"), new ConsolePageModelPipeline(), + Kr36NewsModel.class).run(); + } +}