|
|
@ -59,7 +59,7 @@ public class GithubRepoPageProcessor implements PageProcessor {
|
|
|
|
public void process(Page page) {
|
|
|
|
public void process(Page page) {
|
|
|
|
page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/\\w+/\\w+)").all());
|
|
|
|
page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/\\w+/\\w+)").all());
|
|
|
|
page.putField("author", page.getUrl().regex("https://github\\.com/(\\w+)/.*").toString());
|
|
|
|
page.putField("author", page.getUrl().regex("https://github\\.com/(\\w+)/.*").toString());
|
|
|
|
page.putField("name", page.getHtml().xpath("//h1[@class='entry-title public']/strong/a/text()").toString());
|
|
|
|
page.putField("name", page.getHtml().xpath("//h1[@class='public']/strong/a/text()").toString());
|
|
|
|
if (page.getResultItems().get("name")==null){
|
|
|
|
if (page.getResultItems().get("name")==null){
|
|
|
|
//skip this page
|
|
|
|
//skip this page
|
|
|
|
page.setSkip(true);
|
|
|
|
page.setSkip(true);
|
|
|
@ -89,7 +89,7 @@ You can also use annotation way:
|
|
|
|
@HelpUrl("https://github.com/\\w+")
|
|
|
|
@HelpUrl("https://github.com/\\w+")
|
|
|
|
public class GithubRepo {
|
|
|
|
public class GithubRepo {
|
|
|
|
|
|
|
|
|
|
|
|
@ExtractBy(value = "//h1[@class='entry-title public']/strong/a/text()", notNull = true)
|
|
|
|
@ExtractBy(value = "//h1[@class='public']/strong/a/text()", notNull = true)
|
|
|
|
private String name;
|
|
|
|
private String name;
|
|
|
|
|
|
|
|
|
|
|
|
@ExtractByUrl("https://github\\.com/(\\w+)/.*")
|
|
|
|
@ExtractByUrl("https://github\\.com/(\\w+)/.*")
|
|
|
|