diff --git a/.gitignore b/.gitignore index 8e88e25d..c0dc326c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ -target/* +target *.iml out/ .idea + diff --git a/README.md b/README.md index 5d60572a..0da07399 100644 --- a/README.md +++ b/README.md @@ -127,3 +127,7 @@ To write webmagic, I refered to the projects below : ### Mail-list: [https://groups.google.com/forum/#!forum/webmagic-java](https://groups.google.com/forum/#!forum/webmagic-java) + + +[![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/code4craft/webmagic/trend.png)](https://bitdeli.com/free "Bitdeli Badge") + diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java index 2ded0bdb..3aa23c75 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java @@ -126,7 +126,7 @@ public class FileCacheQueueScheduler implements Scheduler { private void readCursorFile() throws IOException { BufferedReader fileCursorReader = null; try { - new BufferedReader(new FileReader(getFileName(fileCursor))); + fileCursorReader = new BufferedReader(new FileReader(getFileName(fileCursor))); String line; //read the last number while ((line = fileCursorReader.readLine()) != null) { diff --git a/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/SeleniumDownloader.java b/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/SeleniumDownloader.java index 3d4fcdb8..50d332ba 100644 --- a/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/SeleniumDownloader.java +++ b/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/SeleniumDownloader.java @@ -84,6 +84,7 @@ public class SeleniumDownloader implements Downloader, Closeable { WebElement webElement = webDriver.findElement(By.xpath("/html")); String content = webElement.getAttribute("outerHTML"); Page page = new Page(); + page.setRawText(content); page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl()))); page.setUrl(new PlainText(request.getUrl())); page.setRequest(request);