diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/SinaBlogProcesser.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/SinaBlogProcesser.java
deleted file mode 100644
index dcb6eff9..00000000
--- a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/SinaBlogProcesser.java
+++ /dev/null
@@ -1,37 +0,0 @@
-package us.codecraft.webmagic.samples;
-
-import us.codecraft.webmagic.Page;
-import us.codecraft.webmagic.Site;
-import us.codecraft.webmagic.Spider;
-import us.codecraft.webmagic.processor.PageProcessor;
-
-/**
- * @author code4crafter@gmail.com
- */
-public class SinaBlogProcesser implements PageProcessor {
-
- private Site site;
-
- @Override
- public void process(Page page) {
- page.addTargetRequests(page.getHtml().xpath("//div[@class='articalfrontback SG_j_linedot1 clearfix']").links().all());
- page.putField("title", page.getHtml().xpath("//div[@class='articalTitle']/h2"));
- page.putField("content",page.getHtml().xpath("//div[@id='articlebody']//div[@class='articalContent']"));
- page.putField("id",page.getUrl().regex("http://blog\\.sina\\.com\\.cn/s/blog_(\\w+)"));
- page.putField("date",page.getHtml().xpath("//div[@id='articlebody']//span[@class='time SG_txtc']").regex("\\((.*)\\)"));
-// page.putField("tags",page.getHtml().xpath("//td[@class='blog_tag']/h3/a"));
- }
-
- @Override
- public Site getSite() {
- if (site==null){
- site = Site.me().setDomain("blog.sina.com.cn").addStartUrl("http://blog.sina.com.cn/s/blog_4701280b0102egl0.html").setSleepTime(3000).
- setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");
- }
- return site;
- }
-
- public static void main(String[] args) {
- Spider.create(new SinaBlogProcesser()).run();
- }
-}
diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/SinaBlogProcessor.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/SinaBlogProcessor.java
new file mode 100644
index 00000000..2872e02b
--- /dev/null
+++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/SinaBlogProcessor.java
@@ -0,0 +1,48 @@
+package us.codecraft.webmagic.samples;
+
+import us.codecraft.webmagic.Page;
+import us.codecraft.webmagic.Site;
+import us.codecraft.webmagic.Spider;
+import us.codecraft.webmagic.processor.PageProcessor;
+
+/**
+ * @author code4crafter@gmail.com
+ */
+public class SinaBlogProcessor implements PageProcessor {
+
+ public static final String URL_LIST = "http://blog\\.sina\\.com\\.cn/s/articlelist_1487828712_0_\\d+\\.html";
+
+ public static final String URL_POST = "http://blog\\.sina\\.com\\.cn/s/blog_\\w+\\.html";
+
+ private Site site = Site
+ .me()
+ .setDomain("blog.sina.com.cn")
+ .setSleepTime(3000)
+ .setUserAgent(
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");
+
+ @Override
+ public void process(Page page) {
+ //列表页
+ if (page.getUrl().regex(URL_LIST).match()) {
+ page.addTargetRequests(page.getHtml().xpath("//div[@class=\"articleList\"]").links().regex(URL_POST).all());
+ page.addTargetRequests(page.getHtml().links().regex(URL_LIST).all());
+ //文章页
+ } else {
+ page.putField("title", page.getHtml().xpath("//div[@class='articalTitle']/h2"));
+ page.putField("content", page.getHtml().xpath("//div[@id='articlebody']//div[@class='articalContent']"));
+ page.putField("date",
+ page.getHtml().xpath("//div[@id='articlebody']//span[@class='time SG_txtc']").regex("\\((.*)\\)"));
+ }
+ }
+
+ @Override
+ public Site getSite() {
+ return site;
+ }
+
+ public static void main(String[] args) {
+ Spider.create(new SinaBlogProcessor()).addUrl("http://blog.sina.com.cn/s/articlelist_1487828712_0_1.html")
+ .run();
+ }
+}