From a5a9b141b3e8f91538099cb6c335c20f166363ff Mon Sep 17 00:00:00 2001 From: d0ngw Date: Fri, 27 Dec 2013 11:09:04 +0800 Subject: [PATCH 1/3] The SeleniumDownloader should call the setRawText --- .gitignore | 3 ++- .../webmagic/downloader/selenium/SeleniumDownloader.java | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 8e88e25d..c0dc326c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ -target/* +target *.iml out/ .idea + diff --git a/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/SeleniumDownloader.java b/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/SeleniumDownloader.java index 3d4fcdb8..50d332ba 100644 --- a/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/SeleniumDownloader.java +++ b/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/SeleniumDownloader.java @@ -84,6 +84,7 @@ public class SeleniumDownloader implements Downloader, Closeable { WebElement webElement = webDriver.findElement(By.xpath("/html")); String content = webElement.getAttribute("outerHTML"); Page page = new Page(); + page.setRawText(content); page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl()))); page.setUrl(new PlainText(request.getUrl())); page.setRequest(request); From a722f9bb66a58bbd9f16d2bc231e2fee2fce6221 Mon Sep 17 00:00:00 2001 From: jon Date: Wed, 8 Jan 2014 21:24:58 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E7=94=B1=E4=BA=8EFileCac?= =?UTF-8?q?heQueueScheduler=E4=B8=ADfileCursor=20=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E5=86=8D=E6=AC=A1=E6=89=93=E5=BC=80=E6=97=B6=E6=B2=A1=E6=9C=89?= =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E6=8A=9B=E5=87=BANullPointerExcepti?= =?UTF-8?q?on=E7=9A=84=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../codecraft/webmagic/scheduler/FileCacheQueueScheduler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java index 2ded0bdb..3aa23c75 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java @@ -126,7 +126,7 @@ public class FileCacheQueueScheduler implements Scheduler { private void readCursorFile() throws IOException { BufferedReader fileCursorReader = null; try { - new BufferedReader(new FileReader(getFileName(fileCursor))); + fileCursorReader = new BufferedReader(new FileReader(getFileName(fileCursor))); String line; //read the last number while ((line = fileCursorReader.readLine()) != null) { From 6cade5ddf324515545d7204a666003fe8bb1b7f3 Mon Sep 17 00:00:00 2001 From: Bitdeli Chef Date: Wed, 22 Jan 2014 07:03:10 +0000 Subject: [PATCH 3/3] Add a Bitdeli badge to README --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 5d60572a..0da07399 100644 --- a/README.md +++ b/README.md @@ -127,3 +127,7 @@ To write webmagic, I refered to the projects below : ### Mail-list: [https://groups.google.com/forum/#!forum/webmagic-java](https://groups.google.com/forum/#!forum/webmagic-java) + + +[![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/code4craft/webmagic/trend.png)](https://bitdeli.com/free "Bitdeli Badge") +