fix SourceRegion error and add some tests on it #144

pull/157/head
yihua.huang 11 years ago
parent 4e5ba02020
commit e7668e01b8

@ -43,6 +43,19 @@ public class HtmlNode extends AbstractSelectable {
return selectElements(xpathSelector);
}
@Override
public Selectable selectList(Selector selector) {
if (selector instanceof BaseElementSelector) {
return selectElements((BaseElementSelector) selector);
}
return selectList(selector, getSourceTexts());
}
@Override
public Selectable select(Selector selector) {
return selectList(selector);
}
/**
* select elements
*

@ -1,12 +1,13 @@
package us.codecraft.webmagic.model;
import junit.framework.Assert;
import org.junit.Test;
import us.codecraft.webmagic.downloader.MockGithubDownloader;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.downloader.MockGithubDownloader;
import us.codecraft.webmagic.pipeline.PageModelPipeline;
import static org.assertj.core.api.Assertions.assertThat;
/**
* @author code4crafter@gmail.com <br>
*/
@ -14,13 +15,13 @@ public class GithubRepoTest {
@Test
public void test() {
OOSpider.create(Site.me().addStartUrl("https://github.com/code4craft/webmagic").setSleepTime(0)
OOSpider.create(Site.me().setSleepTime(0)
, new PageModelPipeline<GithubRepo>() {
@Override
public void process(GithubRepo o, Task task) {
Assert.assertEquals(86, o.getStar());
Assert.assertEquals(70, o.getFork());
assertThat(o.getStar()).isEqualTo(86);
assertThat(o.getFork()).isEqualTo(70);
}
}, GithubRepo.class).setDownloader(new MockGithubDownloader()).test("https://github.com/code4craft/webmagic");
}, GithubRepo.class).addUrl("https://github.com/code4craft/webmagic").setDownloader(new MockGithubDownloader()).test("https://github.com/code4craft/webmagic");
}
}

@ -0,0 +1,13 @@
package us.codecraft.webmagic.model;
import us.codecraft.webmagic.model.annotation.HelpUrl;
import us.codecraft.webmagic.model.annotation.TargetUrl;
/**
* @author code4crafer@gmail.com
*/
@TargetUrl(value = "http://webmagic.io/post/\\d+",sourceRegion = "//li[@class='post']")
@HelpUrl(value = "http://webmagic.io/list/\\d+",sourceRegion = "//li[@class='list']")
public class MockModel {
}

@ -1,5 +1,6 @@
package us.codecraft.webmagic.model;
import org.apache.commons.io.IOUtils;
import org.junit.Test;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
@ -7,6 +8,8 @@ import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.TargetUrl;
import us.codecraft.webmagic.selector.PlainText;
import java.io.IOException;
import static org.assertj.core.api.Assertions.assertThat;
/**
@ -40,6 +43,22 @@ public class ModelPageProcessorTest {
ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, ModelFoo.class, ModelBar.class);
modelPageProcessor.process(page);
assertThat(page.getResultItems().isSkip()).isFalse();
}
@Test
public void testExtractLinks() throws Exception {
ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, MockModel.class);
Page page = getMockPage();
modelPageProcessor.process(page);
assertThat(page.getTargetRequests()).containsExactly(new Request("http://webmagic.io/list/1"), new Request("http://webmagic.io/list/2"), new Request("http://webmagic.io/post/1"), new Request("http://webmagic.io/post/2"));
}
private Page getMockPage() throws IOException {
Page page = new Page();
page.setRawText(IOUtils.toString(getClass().getClassLoader().getResourceAsStream("html/mock-webmagic.html")));
page.setRequest(new Request("http://webmagic.io/list/0"));
page.setUrl(new PlainText("http://webmagic.io/list/0"));
return page;
}
}

@ -0,0 +1,22 @@
<!DOCTYPE html>
<html>
<head lang="zh">
<meta charset="UTF-8">
<title></title>
</head>
<body>
<ul>
<li class="list"><a href="http://webmagic.io/list/1"></a></li>
<li class="list"><a href="http://webmagic.io/list/2"></a></li>
<li class="list"><a href="http://webmagic.io/post/3"></a></li>
<li class="list"><a href="http://webmagic.io/post/4"></a></li>
</ul>
<ul>
<li class="post"><a href="http://webmagic.io/post/1"></a></li>
<li class="post"><a href="http://webmagic.io/post/2"></a></li>
<li class="post"><a href="http://webmagic.io/list/3"></a></li>
<li class="post"><a href="http://webmagic.io/list/4"></a></li>
</ul>
</body>
</html>
Loading…
Cancel
Save