diff --git a/pom.xml b/pom.xml index 1828c715..77bc93e8 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 7 us.codecraft - 0.4.4-SNAPSHOT + 0.5.0-SNAPSHOT 4.0.0 pom @@ -51,11 +51,11 @@ webmagic-core webmagic-extension/ webmagic-scripts/ - webmagic-avalon - webmagic-lucene - webmagic-samples - webmagic-saxon webmagic-selenium + webmagic-saxon + webmagic-samples + webmagic-admin + webmagic-worker @@ -63,7 +63,7 @@ junit junit - 4.7 + 4.11 test @@ -91,11 +91,6 @@ xsoup 0.2.0 - - net.sf.saxon - Saxon-HE - 9.5.1-1 - com.alibaba fastjson @@ -121,11 +116,6 @@ commons-collections 3.2.1 - - net.sourceforge.htmlcleaner - htmlcleaner - 2.5 - org.apache.commons commons-io diff --git a/webmagic-admin/README.md b/webmagic-admin/README.md new file mode 100644 index 00000000..6e32c068 --- /dev/null +++ b/webmagic-admin/README.md @@ -0,0 +1,3 @@ +WebMagic-Admin +===== +Admin is the control web of workers. \ No newline at end of file diff --git a/webmagic-avalon/pom.xml b/webmagic-admin/pom.xml similarity index 97% rename from webmagic-avalon/pom.xml rename to webmagic-admin/pom.xml index a62bbe9e..58068f93 100644 --- a/webmagic-avalon/pom.xml +++ b/webmagic-admin/pom.xml @@ -3,12 +3,12 @@ webmagic-parent us.codecraft - 0.4.4-SNAPSHOT + 0.5.0-SNAPSHOT 4.0.0 us.codecraft - webmagic-avalon + webmagic-admin war diff --git a/webmagic-avalon/src/main/java/us/codecraft/webmagic/avalon/web/DashBoardController.java b/webmagic-admin/src/main/java/us/codecraft/webmagic/avalon/web/DashBoardController.java similarity index 100% rename from webmagic-avalon/src/main/java/us/codecraft/webmagic/avalon/web/DashBoardController.java rename to webmagic-admin/src/main/java/us/codecraft/webmagic/avalon/web/DashBoardController.java diff --git a/webmagic-avalon/src/main/java/us/codecraft/webmagic/avalon/web/SpiderController.java b/webmagic-admin/src/main/java/us/codecraft/webmagic/avalon/web/SpiderController.java similarity index 100% rename from webmagic-avalon/src/main/java/us/codecraft/webmagic/avalon/web/SpiderController.java rename to webmagic-admin/src/main/java/us/codecraft/webmagic/avalon/web/SpiderController.java diff --git a/webmagic-avalon/src/main/resources/freemarker.properties b/webmagic-admin/src/main/resources/freemarker.properties similarity index 100% rename from webmagic-avalon/src/main/resources/freemarker.properties rename to webmagic-admin/src/main/resources/freemarker.properties diff --git a/webmagic-avalon/src/main/resources/log/log4j.xml b/webmagic-admin/src/main/resources/log/log4j.xml similarity index 100% rename from webmagic-avalon/src/main/resources/log/log4j.xml rename to webmagic-admin/src/main/resources/log/log4j.xml diff --git a/webmagic-avalon/src/main/resources/spring/applicationContext-freemarker.xml b/webmagic-admin/src/main/resources/spring/applicationContext-freemarker.xml similarity index 100% rename from webmagic-avalon/src/main/resources/spring/applicationContext-freemarker.xml rename to webmagic-admin/src/main/resources/spring/applicationContext-freemarker.xml diff --git a/webmagic-avalon/src/main/resources/spring/applicationContext-myBatis.xml b/webmagic-admin/src/main/resources/spring/applicationContext-myBatis.xml similarity index 100% rename from webmagic-avalon/src/main/resources/spring/applicationContext-myBatis.xml rename to webmagic-admin/src/main/resources/spring/applicationContext-myBatis.xml diff --git a/webmagic-avalon/src/main/resources/spring/applicationContext.xml b/webmagic-admin/src/main/resources/spring/applicationContext.xml similarity index 100% rename from webmagic-avalon/src/main/resources/spring/applicationContext.xml rename to webmagic-admin/src/main/resources/spring/applicationContext.xml diff --git a/webmagic-avalon/src/main/webapp/WEB-INF/jsp/404.jsp b/webmagic-admin/src/main/webapp/WEB-INF/jsp/404.jsp similarity index 100% rename from webmagic-avalon/src/main/webapp/WEB-INF/jsp/404.jsp rename to webmagic-admin/src/main/webapp/WEB-INF/jsp/404.jsp diff --git a/webmagic-avalon/src/main/webapp/WEB-INF/jsp/500.jsp b/webmagic-admin/src/main/webapp/WEB-INF/jsp/500.jsp similarity index 100% rename from webmagic-avalon/src/main/webapp/WEB-INF/jsp/500.jsp rename to webmagic-admin/src/main/webapp/WEB-INF/jsp/500.jsp diff --git a/webmagic-avalon/src/main/webapp/WEB-INF/pages/create_spider.ftl b/webmagic-admin/src/main/webapp/WEB-INF/pages/create_spider.ftl similarity index 100% rename from webmagic-avalon/src/main/webapp/WEB-INF/pages/create_spider.ftl rename to webmagic-admin/src/main/webapp/WEB-INF/pages/create_spider.ftl diff --git a/webmagic-avalon/src/main/webapp/WEB-INF/pages/dashboard.ftl b/webmagic-admin/src/main/webapp/WEB-INF/pages/dashboard.ftl similarity index 100% rename from webmagic-avalon/src/main/webapp/WEB-INF/pages/dashboard.ftl rename to webmagic-admin/src/main/webapp/WEB-INF/pages/dashboard.ftl diff --git a/webmagic-avalon/src/main/webapp/WEB-INF/pages/spider_list.ftl b/webmagic-admin/src/main/webapp/WEB-INF/pages/spider_list.ftl similarity index 100% rename from webmagic-avalon/src/main/webapp/WEB-INF/pages/spider_list.ftl rename to webmagic-admin/src/main/webapp/WEB-INF/pages/spider_list.ftl diff --git a/webmagic-avalon/src/main/webapp/WEB-INF/web.xml b/webmagic-admin/src/main/webapp/WEB-INF/web.xml similarity index 100% rename from webmagic-avalon/src/main/webapp/WEB-INF/web.xml rename to webmagic-admin/src/main/webapp/WEB-INF/web.xml diff --git a/webmagic-avalon/src/main/webapp/static/css/bootstrap-cerulean.css b/webmagic-admin/src/main/webapp/static/css/bootstrap-cerulean.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/bootstrap-cerulean.css rename to webmagic-admin/src/main/webapp/static/css/bootstrap-cerulean.css diff --git a/webmagic-avalon/src/main/webapp/static/css/bootstrap-classic.css b/webmagic-admin/src/main/webapp/static/css/bootstrap-classic.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/bootstrap-classic.css rename to webmagic-admin/src/main/webapp/static/css/bootstrap-classic.css diff --git a/webmagic-avalon/src/main/webapp/static/css/bootstrap-classic.min.css b/webmagic-admin/src/main/webapp/static/css/bootstrap-classic.min.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/bootstrap-classic.min.css rename to webmagic-admin/src/main/webapp/static/css/bootstrap-classic.min.css diff --git a/webmagic-avalon/src/main/webapp/static/css/bootstrap-cyborg.css b/webmagic-admin/src/main/webapp/static/css/bootstrap-cyborg.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/bootstrap-cyborg.css rename to webmagic-admin/src/main/webapp/static/css/bootstrap-cyborg.css diff --git a/webmagic-avalon/src/main/webapp/static/css/bootstrap-journal.css b/webmagic-admin/src/main/webapp/static/css/bootstrap-journal.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/bootstrap-journal.css rename to webmagic-admin/src/main/webapp/static/css/bootstrap-journal.css diff --git a/webmagic-avalon/src/main/webapp/static/css/bootstrap-redy.css b/webmagic-admin/src/main/webapp/static/css/bootstrap-redy.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/bootstrap-redy.css rename to webmagic-admin/src/main/webapp/static/css/bootstrap-redy.css diff --git a/webmagic-avalon/src/main/webapp/static/css/bootstrap-responsive.css b/webmagic-admin/src/main/webapp/static/css/bootstrap-responsive.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/bootstrap-responsive.css rename to webmagic-admin/src/main/webapp/static/css/bootstrap-responsive.css diff --git a/webmagic-avalon/src/main/webapp/static/css/bootstrap-responsive.min.css b/webmagic-admin/src/main/webapp/static/css/bootstrap-responsive.min.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/bootstrap-responsive.min.css rename to webmagic-admin/src/main/webapp/static/css/bootstrap-responsive.min.css diff --git a/webmagic-avalon/src/main/webapp/static/css/bootstrap-simplex.css b/webmagic-admin/src/main/webapp/static/css/bootstrap-simplex.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/bootstrap-simplex.css rename to webmagic-admin/src/main/webapp/static/css/bootstrap-simplex.css diff --git a/webmagic-avalon/src/main/webapp/static/css/bootstrap-slate.css b/webmagic-admin/src/main/webapp/static/css/bootstrap-slate.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/bootstrap-slate.css rename to webmagic-admin/src/main/webapp/static/css/bootstrap-slate.css diff --git a/webmagic-avalon/src/main/webapp/static/css/bootstrap-spacelab.css b/webmagic-admin/src/main/webapp/static/css/bootstrap-spacelab.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/bootstrap-spacelab.css rename to webmagic-admin/src/main/webapp/static/css/bootstrap-spacelab.css diff --git a/webmagic-avalon/src/main/webapp/static/css/bootstrap-united.css b/webmagic-admin/src/main/webapp/static/css/bootstrap-united.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/bootstrap-united.css rename to webmagic-admin/src/main/webapp/static/css/bootstrap-united.css diff --git a/webmagic-avalon/src/main/webapp/static/css/charisma-app.css b/webmagic-admin/src/main/webapp/static/css/charisma-app.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/charisma-app.css rename to webmagic-admin/src/main/webapp/static/css/charisma-app.css diff --git a/webmagic-avalon/src/main/webapp/static/css/chosen.css b/webmagic-admin/src/main/webapp/static/css/chosen.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/chosen.css rename to webmagic-admin/src/main/webapp/static/css/chosen.css diff --git a/webmagic-avalon/src/main/webapp/static/css/colorbox.css b/webmagic-admin/src/main/webapp/static/css/colorbox.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/colorbox.css rename to webmagic-admin/src/main/webapp/static/css/colorbox.css diff --git a/webmagic-avalon/src/main/webapp/static/css/elfinder.min.css b/webmagic-admin/src/main/webapp/static/css/elfinder.min.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/elfinder.min.css rename to webmagic-admin/src/main/webapp/static/css/elfinder.min.css diff --git a/webmagic-avalon/src/main/webapp/static/css/elfinder.theme.css b/webmagic-admin/src/main/webapp/static/css/elfinder.theme.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/elfinder.theme.css rename to webmagic-admin/src/main/webapp/static/css/elfinder.theme.css diff --git a/webmagic-avalon/src/main/webapp/static/css/fullcalendar.css b/webmagic-admin/src/main/webapp/static/css/fullcalendar.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/fullcalendar.css rename to webmagic-admin/src/main/webapp/static/css/fullcalendar.css diff --git a/webmagic-avalon/src/main/webapp/static/css/fullcalendar.print.css b/webmagic-admin/src/main/webapp/static/css/fullcalendar.print.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/fullcalendar.print.css rename to webmagic-admin/src/main/webapp/static/css/fullcalendar.print.css diff --git a/webmagic-avalon/src/main/webapp/static/css/jquery-ui-1.8.21.custom.css b/webmagic-admin/src/main/webapp/static/css/jquery-ui-1.8.21.custom.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/jquery-ui-1.8.21.custom.css rename to webmagic-admin/src/main/webapp/static/css/jquery-ui-1.8.21.custom.css diff --git a/webmagic-avalon/src/main/webapp/static/css/jquery.cleditor.css b/webmagic-admin/src/main/webapp/static/css/jquery.cleditor.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/jquery.cleditor.css rename to webmagic-admin/src/main/webapp/static/css/jquery.cleditor.css diff --git a/webmagic-avalon/src/main/webapp/static/css/jquery.iphone.toggle.css b/webmagic-admin/src/main/webapp/static/css/jquery.iphone.toggle.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/jquery.iphone.toggle.css rename to webmagic-admin/src/main/webapp/static/css/jquery.iphone.toggle.css diff --git a/webmagic-avalon/src/main/webapp/static/css/jquery.noty.css b/webmagic-admin/src/main/webapp/static/css/jquery.noty.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/jquery.noty.css rename to webmagic-admin/src/main/webapp/static/css/jquery.noty.css diff --git a/webmagic-avalon/src/main/webapp/static/css/noty_theme_default.css b/webmagic-admin/src/main/webapp/static/css/noty_theme_default.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/noty_theme_default.css rename to webmagic-admin/src/main/webapp/static/css/noty_theme_default.css diff --git a/webmagic-avalon/src/main/webapp/static/css/opa-icons.css b/webmagic-admin/src/main/webapp/static/css/opa-icons.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/opa-icons.css rename to webmagic-admin/src/main/webapp/static/css/opa-icons.css diff --git a/webmagic-avalon/src/main/webapp/static/css/uniform.default.css b/webmagic-admin/src/main/webapp/static/css/uniform.default.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/uniform.default.css rename to webmagic-admin/src/main/webapp/static/css/uniform.default.css diff --git a/webmagic-avalon/src/main/webapp/static/css/uploadify.css b/webmagic-admin/src/main/webapp/static/css/uploadify.css similarity index 100% rename from webmagic-avalon/src/main/webapp/static/css/uploadify.css rename to webmagic-admin/src/main/webapp/static/css/uploadify.css diff --git a/webmagic-avalon/src/main/webapp/static/favicon.jpg b/webmagic-admin/src/main/webapp/static/favicon.jpg similarity index 100% rename from webmagic-avalon/src/main/webapp/static/favicon.jpg rename to webmagic-admin/src/main/webapp/static/favicon.jpg diff --git a/webmagic-avalon/src/main/webapp/static/js/bootstrap-alert.js b/webmagic-admin/src/main/webapp/static/js/bootstrap-alert.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/bootstrap-alert.js rename to webmagic-admin/src/main/webapp/static/js/bootstrap-alert.js diff --git a/webmagic-avalon/src/main/webapp/static/js/bootstrap-button.js b/webmagic-admin/src/main/webapp/static/js/bootstrap-button.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/bootstrap-button.js rename to webmagic-admin/src/main/webapp/static/js/bootstrap-button.js diff --git a/webmagic-avalon/src/main/webapp/static/js/bootstrap-carousel.js b/webmagic-admin/src/main/webapp/static/js/bootstrap-carousel.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/bootstrap-carousel.js rename to webmagic-admin/src/main/webapp/static/js/bootstrap-carousel.js diff --git a/webmagic-avalon/src/main/webapp/static/js/bootstrap-collapse.js b/webmagic-admin/src/main/webapp/static/js/bootstrap-collapse.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/bootstrap-collapse.js rename to webmagic-admin/src/main/webapp/static/js/bootstrap-collapse.js diff --git a/webmagic-avalon/src/main/webapp/static/js/bootstrap-dropdown.js b/webmagic-admin/src/main/webapp/static/js/bootstrap-dropdown.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/bootstrap-dropdown.js rename to webmagic-admin/src/main/webapp/static/js/bootstrap-dropdown.js diff --git a/webmagic-avalon/src/main/webapp/static/js/bootstrap-modal.js b/webmagic-admin/src/main/webapp/static/js/bootstrap-modal.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/bootstrap-modal.js rename to webmagic-admin/src/main/webapp/static/js/bootstrap-modal.js diff --git a/webmagic-avalon/src/main/webapp/static/js/bootstrap-popover.js b/webmagic-admin/src/main/webapp/static/js/bootstrap-popover.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/bootstrap-popover.js rename to webmagic-admin/src/main/webapp/static/js/bootstrap-popover.js diff --git a/webmagic-avalon/src/main/webapp/static/js/bootstrap-scrollspy.js b/webmagic-admin/src/main/webapp/static/js/bootstrap-scrollspy.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/bootstrap-scrollspy.js rename to webmagic-admin/src/main/webapp/static/js/bootstrap-scrollspy.js diff --git a/webmagic-avalon/src/main/webapp/static/js/bootstrap-tab.js b/webmagic-admin/src/main/webapp/static/js/bootstrap-tab.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/bootstrap-tab.js rename to webmagic-admin/src/main/webapp/static/js/bootstrap-tab.js diff --git a/webmagic-avalon/src/main/webapp/static/js/bootstrap-toggle.js b/webmagic-admin/src/main/webapp/static/js/bootstrap-toggle.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/bootstrap-toggle.js rename to webmagic-admin/src/main/webapp/static/js/bootstrap-toggle.js diff --git a/webmagic-avalon/src/main/webapp/static/js/bootstrap-tooltip.js b/webmagic-admin/src/main/webapp/static/js/bootstrap-tooltip.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/bootstrap-tooltip.js rename to webmagic-admin/src/main/webapp/static/js/bootstrap-tooltip.js diff --git a/webmagic-avalon/src/main/webapp/static/js/bootstrap-tour.js b/webmagic-admin/src/main/webapp/static/js/bootstrap-tour.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/bootstrap-tour.js rename to webmagic-admin/src/main/webapp/static/js/bootstrap-tour.js diff --git a/webmagic-avalon/src/main/webapp/static/js/bootstrap-transition.js b/webmagic-admin/src/main/webapp/static/js/bootstrap-transition.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/bootstrap-transition.js rename to webmagic-admin/src/main/webapp/static/js/bootstrap-transition.js diff --git a/webmagic-avalon/src/main/webapp/static/js/bootstrap-typeahead.js b/webmagic-admin/src/main/webapp/static/js/bootstrap-typeahead.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/bootstrap-typeahead.js rename to webmagic-admin/src/main/webapp/static/js/bootstrap-typeahead.js diff --git a/webmagic-avalon/src/main/webapp/static/js/charisma.js b/webmagic-admin/src/main/webapp/static/js/charisma.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/charisma.js rename to webmagic-admin/src/main/webapp/static/js/charisma.js diff --git a/webmagic-avalon/src/main/webapp/static/js/excanvas.js b/webmagic-admin/src/main/webapp/static/js/excanvas.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/excanvas.js rename to webmagic-admin/src/main/webapp/static/js/excanvas.js diff --git a/webmagic-avalon/src/main/webapp/static/js/fullcalendar.min.js b/webmagic-admin/src/main/webapp/static/js/fullcalendar.min.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/fullcalendar.min.js rename to webmagic-admin/src/main/webapp/static/js/fullcalendar.min.js diff --git a/webmagic-avalon/src/main/webapp/static/js/jquery-1.7.2.min.js b/webmagic-admin/src/main/webapp/static/js/jquery-1.7.2.min.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/jquery-1.7.2.min.js rename to webmagic-admin/src/main/webapp/static/js/jquery-1.7.2.min.js diff --git a/webmagic-avalon/src/main/webapp/static/js/jquery-ui-1.8.21.custom.min.js b/webmagic-admin/src/main/webapp/static/js/jquery-ui-1.8.21.custom.min.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/jquery-ui-1.8.21.custom.min.js rename to webmagic-admin/src/main/webapp/static/js/jquery-ui-1.8.21.custom.min.js diff --git a/webmagic-avalon/src/main/webapp/static/js/jquery.autogrow-textarea.js b/webmagic-admin/src/main/webapp/static/js/jquery.autogrow-textarea.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/jquery.autogrow-textarea.js rename to webmagic-admin/src/main/webapp/static/js/jquery.autogrow-textarea.js diff --git a/webmagic-avalon/src/main/webapp/static/js/jquery.chosen.min.js b/webmagic-admin/src/main/webapp/static/js/jquery.chosen.min.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/jquery.chosen.min.js rename to webmagic-admin/src/main/webapp/static/js/jquery.chosen.min.js diff --git a/webmagic-avalon/src/main/webapp/static/js/jquery.cleditor.min.js b/webmagic-admin/src/main/webapp/static/js/jquery.cleditor.min.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/jquery.cleditor.min.js rename to webmagic-admin/src/main/webapp/static/js/jquery.cleditor.min.js diff --git a/webmagic-avalon/src/main/webapp/static/js/jquery.colorbox.min.js b/webmagic-admin/src/main/webapp/static/js/jquery.colorbox.min.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/jquery.colorbox.min.js rename to webmagic-admin/src/main/webapp/static/js/jquery.colorbox.min.js diff --git a/webmagic-avalon/src/main/webapp/static/js/jquery.cookie.js b/webmagic-admin/src/main/webapp/static/js/jquery.cookie.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/jquery.cookie.js rename to webmagic-admin/src/main/webapp/static/js/jquery.cookie.js diff --git a/webmagic-avalon/src/main/webapp/static/js/jquery.dataTables.min.js b/webmagic-admin/src/main/webapp/static/js/jquery.dataTables.min.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/jquery.dataTables.min.js rename to webmagic-admin/src/main/webapp/static/js/jquery.dataTables.min.js diff --git a/webmagic-avalon/src/main/webapp/static/js/jquery.elfinder.min.js b/webmagic-admin/src/main/webapp/static/js/jquery.elfinder.min.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/jquery.elfinder.min.js rename to webmagic-admin/src/main/webapp/static/js/jquery.elfinder.min.js diff --git a/webmagic-avalon/src/main/webapp/static/js/jquery.flot.min.js b/webmagic-admin/src/main/webapp/static/js/jquery.flot.min.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/jquery.flot.min.js rename to webmagic-admin/src/main/webapp/static/js/jquery.flot.min.js diff --git a/webmagic-avalon/src/main/webapp/static/js/jquery.flot.pie.min.js b/webmagic-admin/src/main/webapp/static/js/jquery.flot.pie.min.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/jquery.flot.pie.min.js rename to webmagic-admin/src/main/webapp/static/js/jquery.flot.pie.min.js diff --git a/webmagic-avalon/src/main/webapp/static/js/jquery.flot.resize.min.js b/webmagic-admin/src/main/webapp/static/js/jquery.flot.resize.min.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/jquery.flot.resize.min.js rename to webmagic-admin/src/main/webapp/static/js/jquery.flot.resize.min.js diff --git a/webmagic-avalon/src/main/webapp/static/js/jquery.flot.stack.js b/webmagic-admin/src/main/webapp/static/js/jquery.flot.stack.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/jquery.flot.stack.js rename to webmagic-admin/src/main/webapp/static/js/jquery.flot.stack.js diff --git a/webmagic-avalon/src/main/webapp/static/js/jquery.history.js b/webmagic-admin/src/main/webapp/static/js/jquery.history.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/jquery.history.js rename to webmagic-admin/src/main/webapp/static/js/jquery.history.js diff --git a/webmagic-avalon/src/main/webapp/static/js/jquery.iphone.toggle.js b/webmagic-admin/src/main/webapp/static/js/jquery.iphone.toggle.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/jquery.iphone.toggle.js rename to webmagic-admin/src/main/webapp/static/js/jquery.iphone.toggle.js diff --git a/webmagic-avalon/src/main/webapp/static/js/jquery.js b/webmagic-admin/src/main/webapp/static/js/jquery.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/jquery.js rename to webmagic-admin/src/main/webapp/static/js/jquery.js diff --git a/webmagic-avalon/src/main/webapp/static/js/jquery.noty.js b/webmagic-admin/src/main/webapp/static/js/jquery.noty.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/jquery.noty.js rename to webmagic-admin/src/main/webapp/static/js/jquery.noty.js diff --git a/webmagic-avalon/src/main/webapp/static/js/jquery.raty.min.js b/webmagic-admin/src/main/webapp/static/js/jquery.raty.min.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/jquery.raty.min.js rename to webmagic-admin/src/main/webapp/static/js/jquery.raty.min.js diff --git a/webmagic-avalon/src/main/webapp/static/js/jquery.uniform.min.js b/webmagic-admin/src/main/webapp/static/js/jquery.uniform.min.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/jquery.uniform.min.js rename to webmagic-admin/src/main/webapp/static/js/jquery.uniform.min.js diff --git a/webmagic-avalon/src/main/webapp/static/js/jquery.uploadify-3.1.min.js b/webmagic-admin/src/main/webapp/static/js/jquery.uploadify-3.1.min.js similarity index 100% rename from webmagic-avalon/src/main/webapp/static/js/jquery.uploadify-3.1.min.js rename to webmagic-admin/src/main/webapp/static/js/jquery.uploadify-3.1.min.js diff --git a/webmagic-core/pom.xml b/webmagic-core/pom.xml index 43a6743f..4bea6e2d 100644 --- a/webmagic-core/pom.xml +++ b/webmagic-core/pom.xml @@ -3,7 +3,7 @@ us.codecraft webmagic-parent - 0.4.4-SNAPSHOT + 0.5.0-SNAPSHOT 4.0.0 @@ -50,11 +50,6 @@ commons-collections - - net.sourceforge.htmlcleaner - htmlcleaner - - org.assertj assertj-core diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java index b6f95ac6..0d52ac1f 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java @@ -13,7 +13,6 @@ import us.codecraft.webmagic.pipeline.ResultItemsCollectorPipeline; import us.codecraft.webmagic.processor.PageProcessor; import us.codecraft.webmagic.scheduler.QueueScheduler; import us.codecraft.webmagic.scheduler.Scheduler; -import us.codecraft.webmagic.utils.EnvironmentUtil; import us.codecraft.webmagic.utils.ThreadUtils; import us.codecraft.webmagic.utils.UrlUtils; @@ -541,15 +540,6 @@ public class Spider implements Runnable, Task { return this; } - /** - * switch off xsoup - * - * @return - */ - public static void xsoupOff() { - EnvironmentUtil.setUseXsoup(false); - } - public boolean isExitWhenComplete() { return exitWhenComplete; } diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java index 3f5df764..3db0ff13 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java @@ -4,7 +4,6 @@ import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import us.codecraft.webmagic.utils.EnvironmentUtil; import java.util.ArrayList; import java.util.List; @@ -96,16 +95,11 @@ public class Html extends PlainText { @Override public Selectable xpath(String xpath) { - if (EnvironmentUtil.useXsoup()) { - XsoupSelector xsoupSelector = new XsoupSelector(xpath); - if (document != null) { - return new Html(xsoupSelector.selectList(document)); - } - return selectList(xsoupSelector, strings); - } else { - XpathSelector xpathSelector = new XpathSelector(xpath); - return selectList(xpathSelector, strings); + XpathSelector xpathSelector = new XpathSelector(xpath); + if (document != null) { + return new Html(xpathSelector.selectList(document)); } + return selectList(xpathSelector, strings); } @Override diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectors.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectors.java index 0c34eadb..6cac9640 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectors.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectors.java @@ -32,8 +32,12 @@ public abstract class Selectors { return new XpathSelector(expr); } - public static XsoupSelector xsoup(String expr) { - return new XsoupSelector(expr); + /** + * @Deprecated + * @see #xpath(String) + */ + public static XpathSelector xsoup(String expr) { + return new XpathSelector(expr); } public static AndSelector and(Selector... selectors) { diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/XpathSelector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/XpathSelector.java index c0e428cb..d1bbcae9 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/XpathSelector.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/XpathSelector.java @@ -1,70 +1,32 @@ package us.codecraft.webmagic.selector; -import org.htmlcleaner.*; +import org.jsoup.nodes.Element; +import us.codecraft.xsoup.XPathEvaluator; +import us.codecraft.xsoup.Xsoup; -import java.util.ArrayList; import java.util.List; /** - * XPath selector based on HtmlCleaner.
+ * XPath selector based on Xsoup.
* * @author code4crafter@gmail.com
- * @since 0.1.0 + * @since 0.3.0 */ -public class XpathSelector implements Selector { +public class XpathSelector extends BaseElementSelector { - private String xpathStr; + private XPathEvaluator xPathEvaluator; public XpathSelector(String xpathStr) { - this.xpathStr = xpathStr; + this.xPathEvaluator = Xsoup.compile(xpathStr); } @Override - public String select(String text) { - HtmlCleaner htmlCleaner = new HtmlCleaner(); - TagNode tagNode = htmlCleaner.clean(text); - if (tagNode == null) { - return null; - } - try { - Object[] objects = tagNode.evaluateXPath(xpathStr); - if (objects != null && objects.length >= 1) { - if (objects[0] instanceof TagNode) { - TagNode tagNode1 = (TagNode) objects[0]; - return htmlCleaner.getInnerHtml(tagNode1); - } else { - return objects[0].toString(); - } - } - } catch (XPatherException e) { - e.printStackTrace(); - } - return null; + public String select(Element element) { + return xPathEvaluator.evaluate(element).get(); } @Override - public List selectList(String text) { - HtmlCleaner htmlCleaner = new HtmlCleaner(); - TagNode tagNode = htmlCleaner.clean(text); - if (tagNode == null) { - return null; - } - List results = new ArrayList(); - try { - Object[] objects = tagNode.evaluateXPath(xpathStr); - if (objects != null && objects.length >= 1) { - for (Object object : objects) { - if (object instanceof TagNode) { - TagNode tagNode1 = (TagNode) object; - results.add(htmlCleaner.getInnerHtml(tagNode1)); - } else { - results.add(object.toString()); - } - } - } - } catch (XPatherException e) { - e.printStackTrace(); - } - return results; + public List selectList(Element element) { + return xPathEvaluator.evaluate(element).list(); } } diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/XsoupSelector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/XsoupSelector.java deleted file mode 100644 index ea46290a..00000000 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/XsoupSelector.java +++ /dev/null @@ -1,32 +0,0 @@ -package us.codecraft.webmagic.selector; - -import org.jsoup.nodes.Element; -import us.codecraft.xsoup.XPathEvaluator; -import us.codecraft.xsoup.Xsoup; - -import java.util.List; - -/** - * XPath selector based on Xsoup.
- * - * @author code4crafter@gmail.com
- * @since 0.3.0 - */ -public class XsoupSelector extends BaseElementSelector { - - private XPathEvaluator xPathEvaluator; - - public XsoupSelector(String xpathStr) { - this.xPathEvaluator = Xsoup.compile(xpathStr); - } - - @Override - public String select(Element element) { - return xPathEvaluator.evaluate(element).get(); - } - - @Override - public List selectList(Element element) { - return xPathEvaluator.evaluate(element).list(); - } -} diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/utils/EnvironmentUtil.java b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/EnvironmentUtil.java deleted file mode 100644 index 7aa5c13e..00000000 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/utils/EnvironmentUtil.java +++ /dev/null @@ -1,28 +0,0 @@ -package us.codecraft.webmagic.utils; - -import org.apache.commons.lang3.BooleanUtils; - -import java.util.Properties; - -/** - * @author code4crafter@gmail.com - * @since 0.3.0 - */ -public abstract class EnvironmentUtil { - - private static final String USE_XSOUP = "xsoup"; - - public static boolean useXsoup() { - Properties properties = System.getProperties(); - Object o = properties.get(USE_XSOUP); - if (o == null) { - return true; - } - return BooleanUtils.toBoolean(((String) o).toLowerCase()); - } - - public static void setUseXsoup(boolean useXsoup) { - Properties properties = System.getProperties(); - properties.setProperty(USE_XSOUP, BooleanUtils.toString(useXsoup, "true", "false")); - } -} diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/selector/ExtractorsTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/selector/ExtractorsTest.java index b3980072..e8da48d7 100644 --- a/webmagic-core/src/test/java/us/codecraft/webmagic/selector/ExtractorsTest.java +++ b/webmagic-core/src/test/java/us/codecraft/webmagic/selector/ExtractorsTest.java @@ -29,6 +29,6 @@ public class ExtractorsTest { Assert.assertEquals("bb", and($("title"), regex("aa(bb)cc")).select(html2)); OrSelector or = or($("div h1 a", "innerHtml"), xpath("//title")); Assert.assertEquals("aabbcc", or.select(html)); - Assert.assertEquals("aabbcc", or.select(html2)); + Assert.assertEquals("aabbcc", or.select(html2)); } } diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/utils/EnvironmentUtilTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/utils/EnvironmentUtilTest.java deleted file mode 100644 index cb620e7a..00000000 --- a/webmagic-core/src/test/java/us/codecraft/webmagic/utils/EnvironmentUtilTest.java +++ /dev/null @@ -1,18 +0,0 @@ -package us.codecraft.webmagic.utils; - -import org.junit.Test; - -import static junit.framework.Assert.*; - -/** - * @author code4crafter@gmail.com - */ -public class EnvironmentUtilTest { - - @Test - public void test() { - assertTrue(EnvironmentUtil.useXsoup()); - EnvironmentUtil.setUseXsoup(false); - assertFalse(EnvironmentUtil.useXsoup()); - } -} diff --git a/webmagic-extension/pom.xml b/webmagic-extension/pom.xml index ad22edd4..cd8c12f4 100644 --- a/webmagic-extension/pom.xml +++ b/webmagic-extension/pom.xml @@ -3,7 +3,7 @@ us.codecraft webmagic-parent - 0.4.4-SNAPSHOT + 0.5.0-SNAPSHOT 4.0.0 diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/ExtractorUtils.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/ExtractorUtils.java index 0818fde2..54a4439d 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/ExtractorUtils.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/ExtractorUtils.java @@ -37,12 +37,7 @@ public class ExtractorUtils { } private static Selector getXpathSelector(String value) { - Selector selector; - if (EnvironmentUtil.useXsoup()) { - selector = new XsoupSelector(value); - } else { - selector = new XpathSelector(value); - } + Selector selector = new XpathSelector(value); return selector; } diff --git a/webmagic-lucene/README.md b/webmagic-lucene/README.md deleted file mode 100644 index 77050ab0..00000000 --- a/webmagic-lucene/README.md +++ /dev/null @@ -1,3 +0,0 @@ -webmagic-lucene --------- -尝试将webmagic与lucene结合,打造一个搜索引擎。开发中,不作为webmagic主要模块。 \ No newline at end of file diff --git a/webmagic-lucene/pom.xml b/webmagic-lucene/pom.xml deleted file mode 100644 index f8de71bc..00000000 --- a/webmagic-lucene/pom.xml +++ /dev/null @@ -1,46 +0,0 @@ - - - - webmagic-parent - us.codecraft - 0.4.4-SNAPSHOT - - 4.0.0 - - webmagic-lucene - - - - org.apache.lucene - lucene-analyzers-common - 4.4.0 - - - org.apache.lucene - lucene-queryparser - 4.4.0 - - - us.codecraft - webmagic-extension - ${project.version} - - - junit - junit - - - - - - - maven-deploy-plugin - - true - - - - - - - \ No newline at end of file diff --git a/webmagic-lucene/src/main/java/us/codecraft/webmagic/pipeline/LucenePipeline.java b/webmagic-lucene/src/main/java/us/codecraft/webmagic/pipeline/LucenePipeline.java deleted file mode 100644 index 6fe27021..00000000 --- a/webmagic-lucene/src/main/java/us/codecraft/webmagic/pipeline/LucenePipeline.java +++ /dev/null @@ -1,92 +0,0 @@ -package us.codecraft.webmagic.pipeline; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.queryparser.classic.ParseException; -import org.apache.lucene.queryparser.classic.QueryParser; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.util.Version; -import us.codecraft.webmagic.ResultItems; -import us.codecraft.webmagic.Task; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -/** - * @author code4crafter@gmail.com
- * Date: 13-8-5
- * Time: 下午2:11
- */ -public class LucenePipeline implements Pipeline { - - private Directory directory; - - private Analyzer analyzer; - - private IndexWriterConfig config; - - private void init() throws IOException { - analyzer = new StandardAnalyzer(Version.LUCENE_44); - directory = new RAMDirectory(); - config = new IndexWriterConfig(Version.LUCENE_44, analyzer); - } - - public LucenePipeline() { - try { - init(); - } catch (IOException e) { - e.printStackTrace(); - } - } - - public List search(String fieldName, String value) throws IOException, ParseException { - List documents = new ArrayList(); - DirectoryReader ireader = DirectoryReader.open(directory); - IndexSearcher isearcher = new IndexSearcher(ireader); - // Parse a simple query that searches for "text": - QueryParser parser = new QueryParser(Version.LUCENE_44, fieldName, analyzer); - Query query = parser.parse(value); - ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs; - // Iterate through the results: - for (int i = 0; i < hits.length; i++) { - Document hitDoc = isearcher.doc(hits[i].doc); - documents.add(hitDoc); - } - ireader.close(); - return documents; - } - - @Override - public void process(ResultItems resultItems, Task task) { - if (resultItems.isSkip()){ - return; - } - Document doc = new Document(); - Map all = resultItems.getAll(); - if (all==null){ - return; - } - for (Map.Entry objectEntry : all.entrySet()) { - doc.add(new Field(objectEntry.getKey(), objectEntry.getValue().toString(), TextField.TYPE_STORED)); - } - try { - IndexWriter indexWriter = new IndexWriter(directory, config); - indexWriter.addDocument(doc); - indexWriter.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } -} diff --git a/webmagic-lucene/src/main/test/java/us/codecraft/webmagic/lucene/OschinaBlog.java b/webmagic-lucene/src/main/test/java/us/codecraft/webmagic/lucene/OschinaBlog.java deleted file mode 100644 index b3503702..00000000 --- a/webmagic-lucene/src/main/test/java/us/codecraft/webmagic/lucene/OschinaBlog.java +++ /dev/null @@ -1,61 +0,0 @@ -package us.codecraft.webmagic.lucene; - -import org.apache.lucene.document.Document; -import org.apache.lucene.queryparser.classic.ParseException; -import us.codecraft.webmagic.Site; -import us.codecraft.webmagic.model.annotation.ExtractBy; -import us.codecraft.webmagic.model.OOSpider; -import us.codecraft.webmagic.model.annotation.TargetUrl; -import us.codecraft.webmagic.pipeline.LucenePipeline; - -import java.io.IOException; -import java.util.List; - -/** - * @author code4crafter@gmail.com
- * Date: 13-8-2
- * Time: 上午7:52
- */ -@TargetUrl("http://my.oschina.net/flashsword/blog/\\d+") -public class OschinaBlog { - - @ExtractBy("//title") - private String title; - - @ExtractBy(value = "div.BlogContent", type = ExtractBy.Type.Css) - private String content; - - @Override - public String toString() { - return "OschinaBlog{" + - "title='" + title + '\'' + - ", content='" + content + '\'' + - '}'; - } - - public static void main(String[] args) { - LucenePipeline pipeline = new LucenePipeline(); - OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog"), OschinaBlog.class).pipeline(pipeline).runAsync(); - while (true) { - try { - List search = pipeline.search("title", "webmagic"); - System.out.println(search); - Thread.sleep(3000); - } catch (IOException e) { - e.printStackTrace(); - } catch (ParseException e) { - e.printStackTrace(); - } catch (InterruptedException e) { - e.printStackTrace(); - } - } - } - - public String getTitle() { - return title; - } - - public String getContent() { - return content; - } -} diff --git a/webmagic-panel/README.md b/webmagic-panel/README.md deleted file mode 100644 index 30ddd132..00000000 --- a/webmagic-panel/README.md +++ /dev/null @@ -1,20 +0,0 @@ -Worker: - -任务执行者,提供Http接口,监控运行状态,终止和开始job - -队列: - -仍然使用redis - -Panel: - -提供Web管理后台,管理 - - - -1. 新建任务 - 1. 通过脚本 - 2. 配置 - 3. 分配机器 -2. 已有任务 -3. 任务查看 \ No newline at end of file diff --git a/webmagic-panel/pom.xml b/webmagic-panel/pom.xml deleted file mode 100644 index 288e8df6..00000000 --- a/webmagic-panel/pom.xml +++ /dev/null @@ -1,35 +0,0 @@ - - - - webmagic-parent - us.codecraft - 0.4.3-SNAPSHOT - - 4.0.0 - - us.codecraft - webmagic-panel - - - - us.codecraft - webmagic-scripts - ${project.version} - - - - - - - maven-deploy-plugin - - true - - - - - - - \ No newline at end of file diff --git a/webmagic-samples/pom.xml b/webmagic-samples/pom.xml index f13b7ead..3868ddad 100644 --- a/webmagic-samples/pom.xml +++ b/webmagic-samples/pom.xml @@ -3,7 +3,7 @@ webmagic-parent us.codecraft - 0.4.4-SNAPSHOT + 0.5.0-SNAPSHOT 4.0.0 diff --git a/webmagic-saxon/pom.xml b/webmagic-saxon/pom.xml index 1632b453..f63c21f4 100644 --- a/webmagic-saxon/pom.xml +++ b/webmagic-saxon/pom.xml @@ -3,7 +3,7 @@ webmagic-parent us.codecraft - 0.4.4-SNAPSHOT + 0.5.0-SNAPSHOT 4.0.0 @@ -15,9 +15,15 @@ webmagic-core ${project.version}
+ + net.sourceforge.htmlcleaner + htmlcleaner + 2.5 + net.sf.saxon Saxon-HE + 9.5.1-1 junit diff --git a/webmagic-saxon/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java b/webmagic-saxon/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java index 895ec4bf..728bd690 100644 --- a/webmagic-saxon/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java +++ b/webmagic-saxon/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java @@ -1350,7 +1350,7 @@ public class XpathSelectorTest { + "\n" + "\n" + " \n" + " \n" + " \n" + "\n"; String text2 = "
aaa
"; XpathSelector xpathSelector = new XpathSelector( - "//div[@id='main']/div[@class='blog_main']/div[1][@class='blog_title']/h3/a"); + "//div[@id='main']/div[@class='blog_main']/div[@class='blog_title']/h3/a/text()"); String select = xpathSelector.select(text); Assert.assertEquals("jsoup 解析页面商品信息", select); } diff --git a/webmagic-scripts/pom.xml b/webmagic-scripts/pom.xml index b9c6e54b..5c211609 100644 --- a/webmagic-scripts/pom.xml +++ b/webmagic-scripts/pom.xml @@ -3,7 +3,7 @@ webmagic-parent us.codecraft - 0.4.4-SNAPSHOT + 0.5.0-SNAPSHOT 4.0.0 diff --git a/webmagic-selenium/pom.xml b/webmagic-selenium/pom.xml index 0c7cfc75..01577ce9 100644 --- a/webmagic-selenium/pom.xml +++ b/webmagic-selenium/pom.xml @@ -3,7 +3,7 @@ webmagic-parent us.codecraft - 0.4.4-SNAPSHOT + 0.5.0-SNAPSHOT 4.0.0 diff --git a/webmagic-worker/README.md b/webmagic-worker/README.md new file mode 100644 index 00000000..334ab0ee --- /dev/null +++ b/webmagic-worker/README.md @@ -0,0 +1,3 @@ +WebMagic-Worker +===== +Worker is the spider container. \ No newline at end of file diff --git a/webmagic-worker/pom.xml b/webmagic-worker/pom.xml new file mode 100644 index 00000000..cf10ab56 --- /dev/null +++ b/webmagic-worker/pom.xml @@ -0,0 +1,118 @@ + + + + webmagic-parent + us.codecraft + 0.5.0-SNAPSHOT + + 4.0.0 + + webmagic-worker + war + + + + us.codecraft + webmagic-scripts + ${project.version} + + + + org.mybatis + mybatis + 3.1.1 + + + + org.mybatis + mybatis-spring + 1.1.1 + + + + org.freemarker + freemarker + 2.3.19 + + + org.springframework + spring-test + ${spring-version} + test + + + + org.springframework + spring-aop + ${spring-version} + + + + org.aspectj + aspectjrt + 1.7.2 + + + org.aspectj + aspectjweaver + 1.7.2 + + + org.springframework + spring-core + ${spring-version} + + + org.springframework + spring-webmvc + ${spring-version} + + + + javax.servlet + javax.servlet-api + 3.0.1 + + + org.springframework + spring-context + ${spring-version} + + + org.springframework + spring-context-support + ${spring-version} + + + com.alibaba + fastjson + 1.1.37 + + + + + + + maven-deploy-plugin + + true + + + + org.apache.maven.plugins + maven-jar-plugin + 2.4 + + + + true + ./lib/ + us.codecraft.webmagic.main.QuickStarter + + + + + + + + diff --git a/webmagic-worker/src/main/resources/log4j.xml b/webmagic-worker/src/main/resources/log4j.xml new file mode 100644 index 00000000..a6630f81 --- /dev/null +++ b/webmagic-worker/src/main/resources/log4j.xml @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +