From e7668e01b8d4666e025d5a2f101c08d2cfd482fe Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Thu, 21 Aug 2014 14:29:06 +0800 Subject: [PATCH] fix SourceRegion error and add some tests on it #144 --- .../codecraft/webmagic/selector/HtmlNode.java | 13 +++++++++++ .../webmagic/model/GithubRepoTest.java | 13 ++++++----- .../codecraft/webmagic/model/MockModel.java | 13 +++++++++++ .../model/ModelPageProcessorTest.java | 19 ++++++++++++++++ .../src/test/resouces/html/mock-webmagic.html | 22 +++++++++++++++++++ 5 files changed, 74 insertions(+), 6 deletions(-) create mode 100644 webmagic-extension/src/test/java/us/codecraft/webmagic/model/MockModel.java create mode 100644 webmagic-extension/src/test/resouces/html/mock-webmagic.html diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/HtmlNode.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/HtmlNode.java index e41267b9..d0dbfcd0 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/HtmlNode.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/HtmlNode.java @@ -43,6 +43,19 @@ public class HtmlNode extends AbstractSelectable { return selectElements(xpathSelector); } + @Override + public Selectable selectList(Selector selector) { + if (selector instanceof BaseElementSelector) { + return selectElements((BaseElementSelector) selector); + } + return selectList(selector, getSourceTexts()); + } + + @Override + public Selectable select(Selector selector) { + return selectList(selector); + } + /** * select elements * diff --git a/webmagic-extension/src/test/java/us/codecraft/webmagic/model/GithubRepoTest.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/GithubRepoTest.java index d9501a27..1e9fd525 100644 --- a/webmagic-extension/src/test/java/us/codecraft/webmagic/model/GithubRepoTest.java +++ b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/GithubRepoTest.java @@ -1,12 +1,13 @@ package us.codecraft.webmagic.model; -import junit.framework.Assert; import org.junit.Test; -import us.codecraft.webmagic.downloader.MockGithubDownloader; import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Task; +import us.codecraft.webmagic.downloader.MockGithubDownloader; import us.codecraft.webmagic.pipeline.PageModelPipeline; +import static org.assertj.core.api.Assertions.assertThat; + /** * @author code4crafter@gmail.com
*/ @@ -14,13 +15,13 @@ public class GithubRepoTest { @Test public void test() { - OOSpider.create(Site.me().addStartUrl("https://github.com/code4craft/webmagic").setSleepTime(0) + OOSpider.create(Site.me().setSleepTime(0) , new PageModelPipeline() { @Override public void process(GithubRepo o, Task task) { - Assert.assertEquals(86, o.getStar()); - Assert.assertEquals(70, o.getFork()); + assertThat(o.getStar()).isEqualTo(86); + assertThat(o.getFork()).isEqualTo(70); } - }, GithubRepo.class).setDownloader(new MockGithubDownloader()).test("https://github.com/code4craft/webmagic"); + }, GithubRepo.class).addUrl("https://github.com/code4craft/webmagic").setDownloader(new MockGithubDownloader()).test("https://github.com/code4craft/webmagic"); } } diff --git a/webmagic-extension/src/test/java/us/codecraft/webmagic/model/MockModel.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/MockModel.java new file mode 100644 index 00000000..65310534 --- /dev/null +++ b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/MockModel.java @@ -0,0 +1,13 @@ +package us.codecraft.webmagic.model; + +import us.codecraft.webmagic.model.annotation.HelpUrl; +import us.codecraft.webmagic.model.annotation.TargetUrl; + +/** + * @author code4crafer@gmail.com + */ +@TargetUrl(value = "http://webmagic.io/post/\\d+",sourceRegion = "//li[@class='post']") +@HelpUrl(value = "http://webmagic.io/list/\\d+",sourceRegion = "//li[@class='list']") +public class MockModel { + +} diff --git a/webmagic-extension/src/test/java/us/codecraft/webmagic/model/ModelPageProcessorTest.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/ModelPageProcessorTest.java index 74f3f6a5..7733d4c6 100644 --- a/webmagic-extension/src/test/java/us/codecraft/webmagic/model/ModelPageProcessorTest.java +++ b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/ModelPageProcessorTest.java @@ -1,5 +1,6 @@ package us.codecraft.webmagic.model; +import org.apache.commons.io.IOUtils; import org.junit.Test; import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Request; @@ -7,6 +8,8 @@ import us.codecraft.webmagic.model.annotation.ExtractBy; import us.codecraft.webmagic.model.annotation.TargetUrl; import us.codecraft.webmagic.selector.PlainText; +import java.io.IOException; + import static org.assertj.core.api.Assertions.assertThat; /** @@ -40,6 +43,22 @@ public class ModelPageProcessorTest { ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, ModelFoo.class, ModelBar.class); modelPageProcessor.process(page); assertThat(page.getResultItems().isSkip()).isFalse(); + } + + @Test + public void testExtractLinks() throws Exception { + ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, MockModel.class); + Page page = getMockPage(); + modelPageProcessor.process(page); + assertThat(page.getTargetRequests()).containsExactly(new Request("http://webmagic.io/list/1"), new Request("http://webmagic.io/list/2"), new Request("http://webmagic.io/post/1"), new Request("http://webmagic.io/post/2")); + + } + private Page getMockPage() throws IOException { + Page page = new Page(); + page.setRawText(IOUtils.toString(getClass().getClassLoader().getResourceAsStream("html/mock-webmagic.html"))); + page.setRequest(new Request("http://webmagic.io/list/0")); + page.setUrl(new PlainText("http://webmagic.io/list/0")); + return page; } } diff --git a/webmagic-extension/src/test/resouces/html/mock-webmagic.html b/webmagic-extension/src/test/resouces/html/mock-webmagic.html new file mode 100644 index 00000000..436e1e06 --- /dev/null +++ b/webmagic-extension/src/test/resouces/html/mock-webmagic.html @@ -0,0 +1,22 @@ + + + + + + + + + + + + \ No newline at end of file