complete objectpipeline

pull/17/head
yihua.huang 12 years ago
parent 866ab0a056
commit f84b53514f

@ -0,0 +1,16 @@
package us.codecraft.webmagic.annotation;
import org.apache.commons.lang3.builder.ToStringBuilder;
import us.codecraft.webmagic.Task;
/**
* @author yihua.huang@dianping.com <br>
* @date: 13-8-3 <br>
* Time: 3:41 <br>
*/
public class ConsolePageModelPipeline implements PageModelPipeline {
@Override
public void process(Object o, Task task) {
System.out.println(ToStringBuilder.reflectionToString(o));
}
}

@ -2,28 +2,57 @@ package us.codecraft.webmagic.annotation;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.pipeline.Pipeline;
/**
* @author code4crafter@gmail.com <br>
* @date: 13-8-3 <br>
* Time: 9:51 <br>
*/
public class OOSpider extends Spider{
public class OOSpider extends Spider {
/**
* 使Spider
* OOSpiderObjectPageProcessor
*
* @param pageProcessor
*/
public OOSpider(PageProcessor pageProcessor) {
super(pageProcessor);
private ObjectPageProcessor objectPageProcessor;
private ObjectPipeline objectPipeline;
protected OOSpider(ObjectPageProcessor objectPageProcessor) {
super(objectPageProcessor);
this.objectPageProcessor = objectPageProcessor;
}
public OOSpider(Site site, PageModelPipeline pageModelPipeline, Class... pageModels) {
this(ObjectPageProcessor.create(site, pageModels));
this.objectPipeline = new ObjectPipeline();
super.pipeline(objectPipeline);
for (Class pageModel : pageModels) {
this.objectPipeline.put(pageModel, pageModelPipeline);
}
}
public static OOSpider create(Site site, Class... pageModels) {
return new OOSpider(site, new ConsolePageModelPipeline(), pageModels);
}
public static OOSpider create(Site site, PageModelPipeline pageModelPipeline, Class... pageModels) {
return new OOSpider(site, pageModelPipeline, pageModels);
}
public OOSpider addPageModel(PageModelPipeline pageModelPipeline, Class... pageModels) {
for (Class pageModel : pageModels) {
objectPageProcessor.addPageModel(pageModel);
objectPipeline.put(pageModel, pageModelPipeline);
}
return this;
}
public static OOSpider create(Site site,Class... pageModels) {
OOSpider ooSpider = new OOSpider(ObjectPageProcessor.create(site, pageModels));
ooSpider.pipeline(new ObjectPipeline());
return ooSpider;
public Spider pipeline(Pipeline pipeline) {
throw new UnsupportedOperationException("Sorry, OOSpider can only use ObjectPipeline");
}
}

@ -18,30 +18,31 @@ import java.util.regex.Pattern;
*/
public class ObjectPageProcessor implements PageProcessor {
private List<PageModelExtractor> pageModelExtractorList;
private List<PageModelExtractor> pageModelExtractorList = new ArrayList<PageModelExtractor>();
private Site site;
private Set<Pattern> targetUrlPatterns;
private Set<Pattern> targetUrlPatterns = new HashSet<Pattern>();
public static ObjectPageProcessor create(Site site, Class... clazzs) {
List<PageModelExtractor> pageModelExtractorList = new ArrayList<PageModelExtractor>();
ObjectPageProcessor objectPageProcessor = new ObjectPageProcessor(site);
for (Class clazz : clazzs) {
PageModelExtractor pageModelExtractor = PageModelExtractor.create(clazz);
pageModelExtractorList.add(pageModelExtractor);
objectPageProcessor.addPageModel(clazz);
}
ObjectPageProcessor objectPageProcessor = new ObjectPageProcessor(site, pageModelExtractorList);
return objectPageProcessor;
}
private ObjectPageProcessor(Site site, List<PageModelExtractor> pageModelExtractorList) {
public ObjectPageProcessor addPageModel(Class clazz){
PageModelExtractor pageModelExtractor = PageModelExtractor.create(clazz);
targetUrlPatterns.addAll(pageModelExtractor.getTargetUrlPatterns());
targetUrlPatterns.addAll(pageModelExtractor.getHelpUrlPatterns());
pageModelExtractorList.add(pageModelExtractor);
return this;
}
private ObjectPageProcessor(Site site) {
this.site = site;
this.pageModelExtractorList = pageModelExtractorList;
targetUrlPatterns = new HashSet<Pattern>();
for (PageModelExtractor pageModelExtractor : pageModelExtractorList) {
targetUrlPatterns.addAll(pageModelExtractor.getTargetUrlPatterns());
targetUrlPatterns.addAll(pageModelExtractor.getHelpUrlPatterns());
}
}
@Override

@ -22,9 +22,9 @@ import java.util.regex.Pattern;
*/
class PageModelExtractor {
private List<Pattern> targetUrlPatterns;
private List<Pattern> targetUrlPatterns = new ArrayList<Pattern>();
private List<Pattern> helpUrlPatterns;
private List<Pattern> helpUrlPatterns = new ArrayList<Pattern>();
private Class clazz;
@ -106,7 +106,6 @@ class PageModelExtractor {
}
private void initTargetUrlPatterns() {
targetUrlPatterns = new ArrayList<Pattern>();
Annotation annotation = clazz.getAnnotation(TargetUrl.class);
if (annotation == null) {
targetUrlPatterns.add(Pattern.compile(".*"));
@ -116,7 +115,6 @@ class PageModelExtractor {
targetUrlPatterns.add(Pattern.compile(s.replace(".", "\\.").replace("*", "[^\"'#]*")));
}
}
helpUrlPatterns = new ArrayList<Pattern>();
annotation = clazz.getAnnotation(HelpUrl.class);
if (annotation != null) {
String[] value = ((HelpUrl) annotation).value();

@ -1,6 +1,5 @@
package us.codecraft.webmagic.annotation;
import org.junit.Ignore;
import org.junit.Test;
import us.codecraft.webmagic.Site;
@ -11,13 +10,11 @@ import us.codecraft.webmagic.Site;
*/
public class TestFetcher {
@Ignore("takes long")
// @Ignore("takes long")
@Test
public void test() {
ObjectPipeline objectPipeline = new ObjectPipeline();
OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog/145796"), OschinaBlog.class)
.pipeline(objectPipeline);
OschinaBlog oschinaBlog = null;
.run();
}

Loading…
Cancel
Save