From a872a6480e3b1a7ce904b5e23e38652451b97111 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Sat, 25 Feb 2017 22:46:29 +0800 Subject: [PATCH] fix code sample for github #348 --- README.md | 4 ++-- .../webmagic/processor/example/GithubRepoPageProcessor.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 285eb609..f1ddd274 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ public class GithubRepoPageProcessor implements PageProcessor { public void process(Page page) { page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/\\w+/\\w+)").all()); page.putField("author", page.getUrl().regex("https://github\\.com/(\\w+)/.*").toString()); - page.putField("name", page.getHtml().xpath("//h1[@class='entry-title public']/strong/a/text()").toString()); + page.putField("name", page.getHtml().xpath("//h1[@class='public']/strong/a/text()").toString()); if (page.getResultItems().get("name")==null){ //skip this page page.setSkip(true); @@ -89,7 +89,7 @@ You can also use annotation way: @HelpUrl("https://github.com/\\w+") public class GithubRepo { - @ExtractBy(value = "//h1[@class='entry-title public']/strong/a/text()", notNull = true) + @ExtractBy(value = "//h1[@class='public']/strong/a/text()", notNull = true) private String name; @ExtractByUrl("https://github\\.com/(\\w+)/.*") diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/processor/example/GithubRepoPageProcessor.java b/webmagic-core/src/main/java/us/codecraft/webmagic/processor/example/GithubRepoPageProcessor.java index 955bd5a3..e93ab4cd 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/processor/example/GithubRepoPageProcessor.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/processor/example/GithubRepoPageProcessor.java @@ -18,7 +18,7 @@ public class GithubRepoPageProcessor implements PageProcessor { page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/[\\w\\-]+/[\\w\\-]+)").all()); page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/[\\w\\-])").all()); page.putField("author", page.getUrl().regex("https://github\\.com/(\\w+)/.*").toString()); - page.putField("name", page.getHtml().xpath("//h1[@class='entry-title public']/strong/a/text()").toString()); + page.putField("name", page.getHtml().xpath("//h1[@class='public']/strong/a/text()").toString()); if (page.getResultItems().get("name")==null){ //skip this page page.setSkip(true);