fix nullpointer exception

pull/17/head
yihua.huang 12 years ago
parent b0af45f4bb
commit 4eb3d60083

@ -51,14 +51,14 @@ class ModelPageProcessor implements PageProcessor {
@Override @Override
public void process(Page page) { public void process(Page page) {
for (PageModelExtractor pageModelExtractor : pageModelExtractorList) { for (PageModelExtractor pageModelExtractor : pageModelExtractorList) {
extractLinks(page, pageModelExtractor.getHelpUrlRegionSelector(), pageModelExtractor.getHelpUrlPatterns());
extractLinks(page, pageModelExtractor.getTargetUrlRegionSelector(), pageModelExtractor.getTargetUrlPatterns());
Object process = pageModelExtractor.process(page); Object process = pageModelExtractor.process(page);
if (process == null || (process instanceof List && ((List) process).size() == 0)) { if (process == null || (process instanceof List && ((List) process).size() == 0)) {
page.getResultItems().setSkip(true); page.getResultItems().setSkip(true);
} }
postProcessPageModel(pageModelExtractor.getClazz(), process); postProcessPageModel(pageModelExtractor.getClazz(), process);
page.putField(pageModelExtractor.getClazz().getCanonicalName(), process); page.putField(pageModelExtractor.getClazz().getCanonicalName(), process);
extractLinks(page, pageModelExtractor.getHelpUrlRegionSelector(), pageModelExtractor.getHelpUrlPatterns());
extractLinks(page, pageModelExtractor.getTargetUrlRegionSelector(), pageModelExtractor.getTargetUrlPatterns());
} }
} }

@ -56,18 +56,20 @@ public class RedisScheduler implements Scheduler {
public synchronized Request poll(Task task) { public synchronized Request poll(Task task) {
Jedis jedis = pool.getResource(); Jedis jedis = pool.getResource();
String url = jedis.lpop(QUEUE_PREFIX + task.getUUID()); String url = jedis.lpop(QUEUE_PREFIX + task.getUUID());
if (url == null) {
return null;
}
String key = ITEM_PREFIX + DigestUtils.shaHex(url); String key = ITEM_PREFIX + DigestUtils.shaHex(url);
byte[] bytes = jedis.get(key.getBytes()); byte[] bytes = jedis.get(key.getBytes());
try { if (bytes!=null){
Object o = HessianSerializer.INSTANCE.deSerialize(bytes); try {
return (Request)o; Object o = HessianSerializer.INSTANCE.deSerialize(bytes);
} catch (Exception e) { return (Request)o;
e.printStackTrace(); } catch (Exception e) {
e.printStackTrace();
}
} }
pool.returnResource(jedis); pool.returnResource(jedis);
if (url == null) {
return null;
}
return new Request(url); return new Request(url);
} }
} }

@ -5,6 +5,7 @@ import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.model.*; import us.codecraft.webmagic.model.*;
import us.codecraft.webmagic.pipeline.ConsolePipeline; import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.pipeline.PagedPipeline; import us.codecraft.webmagic.pipeline.PagedPipeline;
import us.codecraft.webmagic.scheduler.RedisScheduler;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
@ -71,7 +72,7 @@ public class News163 implements PagedModel {
public static void main(String[] args) { public static void main(String[] args) {
OOSpider.create(Site.me().addStartUrl("http://news.163.com/13/0802/05/958I1E330001124J_2.html"), News163.class) OOSpider.create(Site.me().addStartUrl("http://news.163.com/13/0802/05/958I1E330001124J_2.html"), News163.class)
.clearPipeline().pipeline(new PagedPipeline()).pipeline(new ConsolePipeline()).run(); .scheduler(new RedisScheduler("localhost")).clearPipeline().pipeline(new PagedPipeline()).pipeline(new ConsolePipeline()).run();
} }
} }

Loading…
Cancel
Save