diff --git a/webmagic-avalon/webmagic-avalon-common/src/main/resources/sql/mysql/schema.sql b/webmagic-avalon/webmagic-avalon-common/src/main/resources/sql/mysql/schema.sql index 6c361b18..c75a884c 100644 --- a/webmagic-avalon/webmagic-avalon-common/src/main/resources/sql/mysql/schema.sql +++ b/webmagic-avalon/webmagic-avalon-common/src/main/resources/sql/mysql/schema.sql @@ -6,4 +6,26 @@ CREATE TABLE `DynamicClass` ( `UpdateTime` datetime NOT NULL, PRIMARY KEY (`Id`), UNIQUE KEY `un_class_name` (`ClassName`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; + +CREATE TABLE `Spider` ( + `Id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `PageProcessorId` int(11) unsigned NOT NULL AUTO_INCREMENT, + `PipelineId` int(11) unsigned NOT NULL AUTO_INCREMENT, + `SchedulerId` int(11) unsigned NOT NULL AUTO_INCREMENT, + `Config` text NOT NULL, + `AddTime` datetime NOT NULL, + `UpdateTime` datetime NOT NULL, + PRIMARY KEY (`Id`), + UNIQUE KEY `un_class_name` (`ClassName`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; + +CREATE TABLE `PageProcessor` ( + `Id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `ClassName` varchar(200) NOT NULL, + `Params` text NOT NULL, + `AddTime` datetime NOT NULL, + `UpdateTime` datetime NOT NULL, + PRIMARY KEY (`Id`), + UNIQUE KEY `un_class_name` (`ClassName`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8; \ No newline at end of file diff --git a/webmagic-avalon/webmagic-worker/src/main/java/us/codecraft/webmagic/avalon/web/DashBoardController.java b/webmagic-avalon/webmagic-worker/src/main/java/us/codecraft/webmagic/avalon/web/DashBoardController.java deleted file mode 100644 index 3ef2a867..00000000 --- a/webmagic-avalon/webmagic-worker/src/main/java/us/codecraft/webmagic/avalon/web/DashBoardController.java +++ /dev/null @@ -1,20 +0,0 @@ -package us.codecraft.webmagic.avalon.web; - -import org.springframework.stereotype.Controller; -import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.servlet.ModelAndView; - -/** - * @author code4crafter@gmail.com - */ -@Controller("dashboard") -@RequestMapping("/") -public class DashBoardController { - - @RequestMapping - public ModelAndView index() { - ModelAndView map = new ModelAndView("dashboard"); - return map; - } - -} diff --git a/webmagic-avalon/webmagic-worker/src/main/java/us/codecraft/webmagic/worker/Worker.java b/webmagic-avalon/webmagic-worker/src/main/java/us/codecraft/webmagic/worker/Worker.java index 91867cc9..a65c94b1 100644 --- a/webmagic-avalon/webmagic-worker/src/main/java/us/codecraft/webmagic/worker/Worker.java +++ b/webmagic-avalon/webmagic-worker/src/main/java/us/codecraft/webmagic/worker/Worker.java @@ -8,6 +8,8 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; /** + * Container of Spiders. + * * @author code4crafter@gmail.com */ public class Worker { diff --git a/webmagic-avalon/webmagic-worker/src/main/java/us/codecraft/webmagic/avalon/web/SpiderController.java b/webmagic-avalon/webmagic-worker/src/main/java/us/codecraft/webmagic/worker/controller/SpiderController.java similarity index 59% rename from webmagic-avalon/webmagic-worker/src/main/java/us/codecraft/webmagic/avalon/web/SpiderController.java rename to webmagic-avalon/webmagic-worker/src/main/java/us/codecraft/webmagic/worker/controller/SpiderController.java index 2f185696..d33b0daa 100644 --- a/webmagic-avalon/webmagic-worker/src/main/java/us/codecraft/webmagic/avalon/web/SpiderController.java +++ b/webmagic-avalon/webmagic-worker/src/main/java/us/codecraft/webmagic/worker/controller/SpiderController.java @@ -1,8 +1,11 @@ -package us.codecraft.webmagic.avalon.web; +package us.codecraft.webmagic.worker.controller; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.ResponseBody; +import us.codecraft.webmagic.worker.Worker; import java.util.HashMap; import java.util.Map; @@ -10,15 +13,19 @@ import java.util.Map; /** * @author code4crafter@gmail.com */ -@Controller("spider") +@Controller @RequestMapping("spider") public class SpiderController { + @Autowired + private Worker worker; + @RequestMapping("create") @ResponseBody - public Map create() { + public Map create(@RequestParam("id") String id) { HashMap map = new HashMap(); map.put("code", 200); return map; } + } diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java index 3db0ff13..614b111f 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java @@ -23,7 +23,7 @@ public class Html extends PlainText { */ private Document document; - private boolean init = false; + private boolean needInitCache = true; public Html(List strings) { super(strings); @@ -33,12 +33,22 @@ public class Html extends PlainText { super(text); } + public Html(List strings, boolean needInitCache) { + super(strings); + this.needInitCache = needInitCache; + } + + public Html(String text, boolean needInitCache) { + super(text); + this.needInitCache = needInitCache; + } + /** * lazy init */ private void initDocument() { - if (this.document == null && !init) { - init = true; + if (this.document == null && needInitCache) { + needInitCache = false; //just init once whether the parsing succeeds or not try { this.document = Jsoup.parse(getText()); @@ -67,7 +77,7 @@ public class Html extends PlainText { results.add(result); } } - return new Html(results); + return new Html(results, false); } @Override @@ -78,7 +88,7 @@ public class Html extends PlainText { List result = selector.selectList(string); results.addAll(result); } - return new Html(results); + return new Html(results, false); } @Override @@ -95,9 +105,9 @@ public class Html extends PlainText { @Override public Selectable xpath(String xpath) { - XpathSelector xpathSelector = new XpathSelector(xpath); + XpathSelector xpathSelector = Selectors.xpath(xpath); if (document != null) { - return new Html(xpathSelector.selectList(document)); + return new Html(xpathSelector.selectList(document), false); } return selectList(xpathSelector, strings); } @@ -106,7 +116,7 @@ public class Html extends PlainText { public Selectable $(String selector) { CssSelector cssSelector = Selectors.$(selector); if (document != null) { - return new Html(cssSelector.selectList(document)); + return new Html(cssSelector.selectList(document), false); } return selectList(cssSelector, strings); } @@ -115,7 +125,7 @@ public class Html extends PlainText { public Selectable $(String selector, String attrName) { CssSelector cssSelector = Selectors.$(selector, attrName); if (document != null) { - return new Html(cssSelector.selectList(document)); + return new Html(cssSelector.selectList(document), false); } return selectList(cssSelector, strings); } diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/selector/SelectorTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/selector/SelectorTest.java new file mode 100644 index 00000000..249a8373 --- /dev/null +++ b/webmagic-core/src/test/java/us/codecraft/webmagic/selector/SelectorTest.java @@ -0,0 +1,26 @@ +package us.codecraft.webmagic.selector; + +import org.junit.Test; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * @author code4crafter@gmail.com + */ +public class SelectorTest { + + private String html = "
"; + + @Test + public void testChain() throws Exception { + Html selectable = new Html(html); + List linksWithoutChain = selectable.links().all(); + Selectable xpath = selectable.xpath("//div"); + List linksWithChainFirstCall = xpath.links().all(); + List linksWithChainSecondCall = xpath.links().all(); + assertThat(linksWithoutChain).hasSameSizeAs(linksWithChainFirstCall); + assertThat(linksWithChainFirstCall).hasSameSizeAs(linksWithChainSecondCall); + } +}