From 4eb3d6008352658e19587f04bf3aae06dbd1e85f Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Mon, 5 Aug 2013 22:06:39 +0800 Subject: [PATCH] fix nullpointer exception --- .../webmagic/model/ModelPageProcessor.java | 4 ++-- .../webmagic/scheduler/RedisScheduler.java | 18 ++++++++++-------- .../webmagic/model/samples/News163.java | 3 ++- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/model/ModelPageProcessor.java b/webmagic-core/src/main/java/us/codecraft/webmagic/model/ModelPageProcessor.java index 12e85d07..84563ce9 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/model/ModelPageProcessor.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/model/ModelPageProcessor.java @@ -51,14 +51,14 @@ class ModelPageProcessor implements PageProcessor { @Override public void process(Page page) { for (PageModelExtractor pageModelExtractor : pageModelExtractorList) { + extractLinks(page, pageModelExtractor.getHelpUrlRegionSelector(), pageModelExtractor.getHelpUrlPatterns()); + extractLinks(page, pageModelExtractor.getTargetUrlRegionSelector(), pageModelExtractor.getTargetUrlPatterns()); Object process = pageModelExtractor.process(page); if (process == null || (process instanceof List && ((List) process).size() == 0)) { page.getResultItems().setSkip(true); } postProcessPageModel(pageModelExtractor.getClazz(), process); page.putField(pageModelExtractor.getClazz().getCanonicalName(), process); - extractLinks(page, pageModelExtractor.getHelpUrlRegionSelector(), pageModelExtractor.getHelpUrlPatterns()); - extractLinks(page, pageModelExtractor.getTargetUrlRegionSelector(), pageModelExtractor.getTargetUrlPatterns()); } } diff --git a/webmagic-plugin/webmagic-misc/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java b/webmagic-plugin/webmagic-misc/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java index c00c12f2..fb82a69e 100644 --- a/webmagic-plugin/webmagic-misc/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java +++ b/webmagic-plugin/webmagic-misc/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java @@ -56,18 +56,20 @@ public class RedisScheduler implements Scheduler { public synchronized Request poll(Task task) { Jedis jedis = pool.getResource(); String url = jedis.lpop(QUEUE_PREFIX + task.getUUID()); + if (url == null) { + return null; + } String key = ITEM_PREFIX + DigestUtils.shaHex(url); byte[] bytes = jedis.get(key.getBytes()); - try { - Object o = HessianSerializer.INSTANCE.deSerialize(bytes); - return (Request)o; - } catch (Exception e) { - e.printStackTrace(); + if (bytes!=null){ + try { + Object o = HessianSerializer.INSTANCE.deSerialize(bytes); + return (Request)o; + } catch (Exception e) { + e.printStackTrace(); + } } pool.returnResource(jedis); - if (url == null) { - return null; - } return new Request(url); } } diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/News163.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/News163.java index 52abe887..2aa90733 100644 --- a/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/News163.java +++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/News163.java @@ -5,6 +5,7 @@ import us.codecraft.webmagic.Site; import us.codecraft.webmagic.model.*; import us.codecraft.webmagic.pipeline.ConsolePipeline; import us.codecraft.webmagic.pipeline.PagedPipeline; +import us.codecraft.webmagic.scheduler.RedisScheduler; import java.util.Collection; import java.util.List; @@ -71,7 +72,7 @@ public class News163 implements PagedModel { public static void main(String[] args) { OOSpider.create(Site.me().addStartUrl("http://news.163.com/13/0802/05/958I1E330001124J_2.html"), News163.class) - .clearPipeline().pipeline(new PagedPipeline()).pipeline(new ConsolePipeline()).run(); + .scheduler(new RedisScheduler("localhost")).clearPipeline().pipeline(new PagedPipeline()).pipeline(new ConsolePipeline()).run(); } }