diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..0cecd852 --- /dev/null +++ b/LICENSE @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "{}" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright 2013 code4craft + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/README-zh.md b/README-zh.md index d69dd63f..e8f07355 100644 --- a/README-zh.md +++ b/README-zh.md @@ -168,30 +168,6 @@ webmagic的使用可以参考:[oschina openapi 应用:博客搬家](http://m webmagic遵循[Apache 2.0协议](http://opensource.org/licenses/Apache-2.0) -### 贡献者: - -以下是为WebMagic提交过代码或者issue的朋友: - -* [ccliangbo](https://github.com/ccliangbo) -* [yuany](https://github.com/yuany) -* [yxssfxwzy](https://github.com/yxssfxwzy) -* [linkerlin](https://github.com/linkerlin) -* [d0ngw](https://github.com/d0ngw) -* [xuchaoo](https://github.com/xuchaoo) -* [supermicah](https://github.com/supermicah) -* [SimpleExpress](https://github.com/SimpleExpress) -* [aruanruan](https://github.com/aruanruan) -* [l1z2g9](https://github.com/l1z2g9) -* [zhegexiaohuozi](https://github.com/zhegexiaohuozi) -* [ywooer](https://github.com/ywooer) -* [yyw258520](https://github.com/yyw258520) -* [perfecking](https://github.com/perfecking) -* [lidongyang](http://my.oschina.net/lidongyang) -* [seveniu](https://github.com/seveniu) -* [sebastian1118](https://github.com/sebastian1118) -* [codev777](https://github.com/codev777) -* [fengwuze](https://github.com/fengwuze) - ### 邮件组: Gmail: diff --git a/README.md b/README.md index 285eb609..87858443 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ public class GithubRepoPageProcessor implements PageProcessor { public void process(Page page) { page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/\\w+/\\w+)").all()); page.putField("author", page.getUrl().regex("https://github\\.com/(\\w+)/.*").toString()); - page.putField("name", page.getHtml().xpath("//h1[@class='entry-title public']/strong/a/text()").toString()); + page.putField("name", page.getHtml().xpath("//h1[@class='public']/strong/a/text()").toString()); if (page.getResultItems().get("name")==null){ //skip this page page.setSkip(true); @@ -89,7 +89,7 @@ You can also use annotation way: @HelpUrl("https://github.com/\\w+") public class GithubRepo { - @ExtractBy(value = "//h1[@class='entry-title public']/strong/a/text()", notNull = true) + @ExtractBy(value = "//h1[@class='public']/strong/a/text()", notNull = true) private String name; @ExtractByUrl("https://github\\.com/(\\w+)/.*") @@ -114,39 +114,12 @@ The architecture of webmagic (refered to [Scrapy](http://scrapy.org/)) ![image](http://code4craft.github.io/images/posts/webmagic.png) -Javadocs: [http://code4craft.github.io/webmagic/docs/en/](http://code4craft.github.io/webmagic/docs/en/) - -There are some samples in `webmagic-samples` package. +There are more examples in `webmagic-samples` package. ### Lisence: Lisenced under [Apache 2.0 lisence](http://opensource.org/licenses/Apache-2.0) -### Contributors: - -Thanks these people for commiting source code, reporting bugs or suggesting for new feature: - -* [ccliangbo](https://github.com/ccliangbo) -* [yuany](https://github.com/yuany) -* [yxssfxwzy](https://github.com/yxssfxwzy) -* [linkerlin](https://github.com/linkerlin) -* [d0ngw](https://github.com/d0ngw) -* [xuchaoo](https://github.com/xuchaoo) -* [supermicah](https://github.com/supermicah) -* [SimpleExpress](https://github.com/SimpleExpress) -* [aruanruan](https://github.com/aruanruan) -* [l1z2g9](https://github.com/l1z2g9) -* [zhegexiaohuozi](https://github.com/zhegexiaohuozi) -* [ywooer](https://github.com/ywooer) -* [yyw258520](https://github.com/yyw258520) -* [perfecking](https://github.com/perfecking) -* [lidongyang](http://my.oschina.net/lidongyang) -* [seveniu](https://github.com/seveniu) -* [sebastian1118](https://github.com/sebastian1118) -* [codev777](https://github.com/codev777) -* [fengwuze](https://github.com/fengwuze) - - ### Thanks: To write webmagic, I refered to the projects below : diff --git a/pom.xml b/pom.xml index 0743c02b..04b6dec0 100644 --- a/pom.xml +++ b/pom.xml @@ -64,6 +64,12 @@ 4.11 test + + org.mockito + mockito-all + 1.10.19 + test + org.apache.httpcomponents httpclient @@ -97,7 +103,7 @@ com.alibaba fastjson - 1.2.21 + 1.2.28 com.github.dreamhead @@ -130,7 +136,7 @@ commons-collections commons-collections - 3.2.1 + 3.2.2 org.apache.commons diff --git a/webmagic-core/pom.xml b/webmagic-core/pom.xml index fbd5034c..ad969612 100644 --- a/webmagic-core/pom.xml +++ b/webmagic-core/pom.xml @@ -40,6 +40,11 @@ slf4j-api + + org.mockito + mockito-all + + org.slf4j slf4j-log4j12 diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Page.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Page.java index 62f21f8e..7c0064d1 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Page.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Page.java @@ -107,14 +107,12 @@ public class Page { * @param requests requests */ public void addTargetRequests(List requests) { - synchronized (targetRequests) { - for (String s : requests) { - if (StringUtils.isBlank(s) || s.equals("#") || s.startsWith("javascript:")) { - continue; - } - s = UrlUtils.canonicalizeUrl(s, url.toString()); - targetRequests.add(new Request(s)); + for (String s : requests) { + if (StringUtils.isBlank(s) || s.equals("#") || s.startsWith("javascript:")) { + continue; } + s = UrlUtils.canonicalizeUrl(s, url.toString()); + targetRequests.add(new Request(s)); } } @@ -125,14 +123,12 @@ public class Page { * @param priority priority */ public void addTargetRequests(List requests, long priority) { - synchronized (targetRequests) { - for (String s : requests) { - if (StringUtils.isBlank(s) || s.equals("#") || s.startsWith("javascript:")) { - continue; - } - s = UrlUtils.canonicalizeUrl(s, url.toString()); - targetRequests.add(new Request(s).setPriority(priority)); + for (String s : requests) { + if (StringUtils.isBlank(s) || s.equals("#") || s.startsWith("javascript:")) { + continue; } + s = UrlUtils.canonicalizeUrl(s, url.toString()); + targetRequests.add(new Request(s).setPriority(priority)); } } @@ -145,10 +141,8 @@ public class Page { if (StringUtils.isBlank(requestString) || requestString.equals("#")) { return; } - synchronized (targetRequests) { - requestString = UrlUtils.canonicalizeUrl(requestString, url.toString()); - targetRequests.add(new Request(requestString)); - } + requestString = UrlUtils.canonicalizeUrl(requestString, url.toString()); + targetRequests.add(new Request(requestString)); } /** @@ -157,9 +151,7 @@ public class Page { * @param request request */ public void addTargetRequest(Request request) { - synchronized (targetRequests) { - targetRequests.add(request); - } + targetRequests.add(request); } /** diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java index 4d7f4270..c8c59782 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java @@ -85,27 +85,10 @@ public class Request implements Serializable { return url; } - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - Request request = (Request) o; - - if (!url.equals(request.url)) return false; - - return true; - } - public Map getExtras() { return extras; } - @Override - public int hashCode() { - return url.hashCode(); - } - public void setExtras(Map extras) { this.extras = extras; } @@ -132,23 +115,52 @@ public class Request implements Serializable { return params; } /** - * POST/GET参数设置 + * set params for request + *
+ * DO NOT set this for request already has params, like 'https://github.com/search?q=webmagic' + * @param params params * */ public void setParams(Map params) { this.params = params; } /** - * POST/GET参数设置 + * set params for request + *
+ * DO NOT set this for request already has params, like 'https://github.com/search?q=webmagic' + * @param key key + * @param value value * */ public void putParams(String key,String value) { params.put(key,value); } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + Request request = (Request) o; + + if (url != null ? !url.equals(request.url) : request.url != null) return false; + if (method != null ? !method.equals(request.method) : request.method != null) return false; + return params != null ? params.equals(request.params) : request.params == null; + } + + @Override + public int hashCode() { + int result = url != null ? url.hashCode() : 0; + result = 31 * result + (method != null ? method.hashCode() : 0); + result = 31 * result + (params != null ? params.hashCode() : 0); + return result; + } + @Override public String toString() { return "Request{" + "url='" + url + '\'' + ", method='" + method + '\'' + ", extras=" + extras + + ", params=" + params + ", priority=" + priority + '}'; } diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java index b1afb660..49734b7e 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java @@ -305,7 +305,7 @@ public class Spider implements Runnable, Task { initComponent(); logger.info("Spider " + getUUID() + " started!"); while (!Thread.currentThread().isInterrupted() && stat.get() == STAT_RUNNING) { - Request request = scheduler.poll(this); + final Request request = scheduler.poll(this); if (request == null) { if (threadPool.getThreadAlive() == 0 && exitWhenComplete) { break; @@ -313,16 +313,15 @@ public class Spider implements Runnable, Task { // wait until new url added waitNewUrl(); } else { - final Request requestFinal = request; threadPool.execute(new Runnable() { @Override public void run() { try { - processRequest(requestFinal); - onSuccess(requestFinal); + processRequest(request); + onSuccess(request); } catch (Exception e) { - onError(requestFinal); - logger.error("process request " + requestFinal + " error", e); + onError(request); + logger.error("process request " + request + " error", e); } finally { pageCount.incrementAndGet(); signalNewUrl(); @@ -587,6 +586,7 @@ public class Spider implements Runnable, Task { if (threadNum <= 0) { throw new IllegalArgumentException("threadNum should be more than one!"); } + this.executorService = executorService; return this; } diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java index 6c1e89c0..9e77ef5f 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java @@ -1,7 +1,6 @@ package us.codecraft.webmagic.downloader; import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.StringUtils; import org.apache.http.HttpHost; import org.apache.http.HttpResponse; import org.apache.http.NameValuePair; @@ -15,10 +14,6 @@ import org.apache.http.client.methods.RequestBuilder; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.message.BasicNameValuePair; import org.apache.http.util.EntityUtils; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import us.codecraft.webmagic.Page; @@ -27,8 +22,8 @@ import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Task; import us.codecraft.webmagic.proxy.Proxy; import us.codecraft.webmagic.selector.PlainText; +import us.codecraft.webmagic.utils.CharsetUtils; import us.codecraft.webmagic.utils.HttpConstant; -import us.codecraft.webmagic.utils.UrlUtils; import us.codecraft.webmagic.utils.WMCollections; import java.io.IOException; @@ -98,8 +93,8 @@ public class HttpClientDownloader extends AbstractDownloader { proxyHost = site.getHttpProxy(); } - HttpUriRequest httpUriRequest = getHttpUriRequest(request, site, headers, proxyHost);//���������˴��� - httpResponse = getHttpClient(site, proxy).execute(httpUriRequest);//getHttpClient�������˴�����֤ + HttpUriRequest httpUriRequest = getHttpUriRequest(request, site, headers, proxyHost); + httpResponse = getHttpClient(site, proxy).execute(httpUriRequest); statusCode = httpResponse.getStatusLine().getStatusCode(); request.putExtra(Request.STATUS_CODE, statusCode); if (statusAccept(acceptStatCode, statusCode)) { @@ -167,39 +162,44 @@ public class HttpClientDownloader extends AbstractDownloader { String method = request.getMethod(); if (method == null || method.equalsIgnoreCase(HttpConstant.Method.GET)) { //default get - RequestBuilder requestBuilder=RequestBuilder.get(); - if (request.getParams() != null) { - for (Map.Entry entry : request.getParams().entrySet()) { - requestBuilder.addParameter(entry.getKey(), entry.getValue()); - } - } - return requestBuilder; + return addQueryParams(RequestBuilder.get(),request.getParams()); } else if (method.equalsIgnoreCase(HttpConstant.Method.POST)) { - RequestBuilder requestBuilder = RequestBuilder.post(); - NameValuePair[] nameValuePair = (NameValuePair[]) request.getExtra("nameValuePair"); - List allNameValuePair=new ArrayList(); - if (nameValuePair != null && nameValuePair.length > 0) { - allNameValuePair= Arrays.asList(nameValuePair); - } - if (request.getParams() != null) { - for (String key : request.getParams().keySet()) { - allNameValuePair.add(new BasicNameValuePair(key, request.getParams().get(key))); - } - } - requestBuilder.setEntity(new UrlEncodedFormEntity(allNameValuePair, Charset.forName("utf8"))); - return requestBuilder; + return addFormParams(RequestBuilder.post(), (NameValuePair[]) request.getExtra("nameValuePair"), request.getParams()); } else if (method.equalsIgnoreCase(HttpConstant.Method.HEAD)) { - return RequestBuilder.head(); + return addQueryParams(RequestBuilder.head(),request.getParams()); } else if (method.equalsIgnoreCase(HttpConstant.Method.PUT)) { - return RequestBuilder.put(); + return addFormParams(RequestBuilder.put(), (NameValuePair[]) request.getExtra("nameValuePair"), request.getParams()); } else if (method.equalsIgnoreCase(HttpConstant.Method.DELETE)) { - return RequestBuilder.delete(); + return addQueryParams(RequestBuilder.delete(),request.getParams()); } else if (method.equalsIgnoreCase(HttpConstant.Method.TRACE)) { - return RequestBuilder.trace(); + return addQueryParams(RequestBuilder.trace(),request.getParams()); } throw new IllegalArgumentException("Illegal HTTP Method " + method); } + private RequestBuilder addFormParams(RequestBuilder requestBuilder, NameValuePair[] nameValuePair, Map params) { + List allNameValuePair=new ArrayList(); + if (nameValuePair != null && nameValuePair.length > 0) { + allNameValuePair= Arrays.asList(nameValuePair); + } + if (params != null) { + for (String key : params.keySet()) { + allNameValuePair.add(new BasicNameValuePair(key, params.get(key))); + } + } + requestBuilder.setEntity(new UrlEncodedFormEntity(allNameValuePair, Charset.forName("utf8"))); + return requestBuilder; + } + + private RequestBuilder addQueryParams(RequestBuilder requestBuilder, Map params) { + if (params != null) { + for (Map.Entry entry : params.entrySet()) { + requestBuilder.addParameter(entry.getKey(), entry.getValue()); + } + } + return requestBuilder; + } + protected Page handleResponse(Request request, String charset, HttpResponse httpResponse, Task task) throws IOException { String content = getContent(charset, httpResponse); Page page = new Page(); @@ -226,40 +226,6 @@ public class HttpClientDownloader extends AbstractDownloader { } protected String getHtmlCharset(HttpResponse httpResponse, byte[] contentBytes) throws IOException { - String charset; - // charset - // 1、encoding in http header Content-Type - String value = httpResponse.getEntity().getContentType().getValue(); - charset = UrlUtils.getCharset(value); - if (StringUtils.isNotBlank(charset)) { - logger.debug("Auto get charset: {}", charset); - return charset; - } - // use default charset to decode first time - Charset defaultCharset = Charset.defaultCharset(); - String content = new String(contentBytes, defaultCharset.name()); - // 2、charset in meta - if (StringUtils.isNotEmpty(content)) { - Document document = Jsoup.parse(content); - Elements links = document.select("meta"); - for (Element link : links) { - // 2.1、html4.01 - String metaContent = link.attr("content"); - String metaCharset = link.attr("charset"); - if (metaContent.indexOf("charset") != -1) { - metaContent = metaContent.substring(metaContent.indexOf("charset"), metaContent.length()); - charset = metaContent.split("=")[1]; - break; - } - // 2.2、html5 - else if (StringUtils.isNotEmpty(metaCharset)) { - charset = metaCharset; - break; - } - } - } - logger.debug("Auto get charset: {}", charset); - // 3、todo use tools as cpdetector for content decode - return charset; + return CharsetUtils.detectCharset(httpResponse.getEntity().getContentType().getValue(), contentBytes); } } diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/processor/example/GithubRepoPageProcessor.java b/webmagic-core/src/main/java/us/codecraft/webmagic/processor/example/GithubRepoPageProcessor.java index 955bd5a3..e93ab4cd 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/processor/example/GithubRepoPageProcessor.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/processor/example/GithubRepoPageProcessor.java @@ -18,7 +18,7 @@ public class GithubRepoPageProcessor implements PageProcessor { page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/[\\w\\-]+/[\\w\\-]+)").all()); page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/[\\w\\-])").all()); page.putField("author", page.getUrl().regex("https://github\\.com/(\\w+)/.*").toString()); - page.putField("name", page.getHtml().xpath("//h1[@class='entry-title public']/strong/a/text()").toString()); + page.putField("name", page.getHtml().xpath("//h1[@class='public']/strong/a/text()").toString()); if (page.getResultItems().get("name")==null){ //skip this page page.setSkip(true); diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/Proxy.java b/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/Proxy.java index 26096715..dbe3a182 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/Proxy.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/Proxy.java @@ -79,14 +79,14 @@ public class Proxy implements Delayed, Serializable { private List failedErrorType = new ArrayList(); - Proxy(HttpHost httpHost, String user, String password) { + public Proxy(HttpHost httpHost, String user, String password) { this.httpHost = httpHost; this.user = user; this.password = password; this.canReuseTime = System.nanoTime() + TimeUnit.NANOSECONDS.convert(reuseTimeInterval, TimeUnit.MILLISECONDS); } - Proxy(HttpHost httpHost, int reuseInterval, String user, String password) { + public Proxy(HttpHost httpHost, int reuseInterval, String user, String password) { this.httpHost = httpHost; this.user = user; this.password = password; diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/DuplicateRemovedScheduler.java b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/DuplicateRemovedScheduler.java index 9be7adb5..ecbeecb6 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/DuplicateRemovedScheduler.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/DuplicateRemovedScheduler.java @@ -6,6 +6,7 @@ import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Task; import us.codecraft.webmagic.scheduler.component.DuplicateRemover; import us.codecraft.webmagic.scheduler.component.HashSetDuplicateRemover; +import us.codecraft.webmagic.utils.HttpConstant; /** * Remove duplicate urls and only push urls which are not duplicate.

@@ -31,7 +32,7 @@ public abstract class DuplicateRemovedScheduler implements Scheduler { @Override public void push(Request request, Task task) { logger.trace("get a candidate url {}", request.getUrl()); - if (!duplicatedRemover.isDuplicate(request, task) || shouldReserved(request)) { + if (shouldReserved(request) || noNeedToRemoveDuplicate(request) || !duplicatedRemover.isDuplicate(request, task)) { logger.debug("push to queue {}", request.getUrl()); pushWhenNoDuplicate(request, task); } @@ -41,6 +42,10 @@ public abstract class DuplicateRemovedScheduler implements Scheduler { return request.getExtra(Request.CYCLE_TRIED_TIMES) != null; } + protected boolean noNeedToRemoveDuplicate(Request request) { + return HttpConstant.Method.POST.equalsIgnoreCase(request.getMethod()); + } + protected void pushWhenNoDuplicate(Request request, Task task) { } diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/QueueScheduler.java b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/QueueScheduler.java index c38311f2..078506c6 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/QueueScheduler.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/QueueScheduler.java @@ -26,7 +26,7 @@ public class QueueScheduler extends DuplicateRemovedScheduler implements Monitor } @Override - public synchronized Request poll(Task task) { + public Request poll(Task task) { return queue.poll(); } diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/RegexSelector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/RegexSelector.java index 43818965..584cf900 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/RegexSelector.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/RegexSelector.java @@ -28,8 +28,7 @@ public class RegexSelector implements Selector { } // Check bracket for regex group. Add default group 1 if there is no group. // Only check if there exists the valid left parenthesis, leave regexp validation for Pattern. - if (StringUtils.countMatches(regexStr, "(") - StringUtils.countMatches(regexStr, "\\(") == - StringUtils.countMatches(regexStr, "(?:") - StringUtils.countMatches(regexStr, "\\(?:")) { + if ( ! hasGroup(regexStr) ){ regexStr = "(" + regexStr + ")"; } this.regexStr = regexStr; @@ -45,6 +44,30 @@ public class RegexSelector implements Selector { this(regexStr, 1); } + private boolean hasGroup(String regexStr) { + if (StringUtils.countMatches(regexStr, "(") - StringUtils.countMatches(regexStr, "\\(") == + StringUtils.countMatches(regexStr, "(?:") - StringUtils.countMatches(regexStr, "\\(?:")){ + return false; + } + if (StringUtils.countMatches(regexStr, "(") - StringUtils.countMatches(regexStr, "\\(") == + StringUtils.countMatches(regexStr, "(?=") - StringUtils.countMatches(regexStr, "\\(?=") ) { + return false; + } + if (StringUtils.countMatches(regexStr, "(") - StringUtils.countMatches(regexStr, "\\(") == + StringUtils.countMatches(regexStr, "(?<") - StringUtils.countMatches(regexStr, "\\(?<") ) { + return false; + } + if (StringUtils.countMatches(regexStr, "(") - StringUtils.countMatches(regexStr, "\\(") == + StringUtils.countMatches(regexStr, "(?!") - StringUtils.countMatches(regexStr, "\\(?!") ) { + return false; + } + if (StringUtils.countMatches(regexStr, "(") - StringUtils.countMatches(regexStr, "\\(") == + StringUtils.countMatches(regexStr, "(?#") - StringUtils.countMatches(regexStr, "\\(?#") ) { + return false; + } + return true; + } + @Override public String select(String text) { return selectGroup(text).get(group); diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/utils/CharsetUtils.java b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/CharsetUtils.java new file mode 100644 index 00000000..50b4f1b6 --- /dev/null +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/CharsetUtils.java @@ -0,0 +1,61 @@ +package us.codecraft.webmagic.utils; + +import org.apache.commons.lang3.StringUtils; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.charset.Charset; + +/** + * @author code4crafter@gmail.com + * Date: 17/3/11 + * Time: 10:36 + * @since 0.6.2 + */ +public abstract class CharsetUtils { + + private static Logger logger = LoggerFactory.getLogger(CharsetUtils.class); + + public static String detectCharset(String contentType, byte[] contentBytes) throws IOException { + String charset; + // charset + // 1、encoding in http header Content-Type + charset = UrlUtils.getCharset(contentType); + if (StringUtils.isNotBlank(contentType)) { + logger.debug("Auto get charset: {}", charset); + return charset; + } + // use default charset to decode first time + Charset defaultCharset = Charset.defaultCharset(); + String content = new String(contentBytes, defaultCharset); + // 2、charset in meta + if (StringUtils.isNotEmpty(content)) { + Document document = Jsoup.parse(content); + Elements links = document.select("meta"); + for (Element link : links) { + // 2.1、html4.01 + String metaContent = link.attr("content"); + String metaCharset = link.attr("charset"); + if (metaContent.indexOf("charset") != -1) { + metaContent = metaContent.substring(metaContent.indexOf("charset"), metaContent.length()); + charset = metaContent.split("=")[1]; + break; + } + // 2.2、html5 + else if (StringUtils.isNotEmpty(metaCharset)) { + charset = metaCharset; + break; + } + } + } + logger.debug("Auto get charset: {}", charset); + // 3、todo use tools as cpdetector for content decode + return charset; + } + +} diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/RequestTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/RequestTest.java new file mode 100644 index 00000000..c7e4943d --- /dev/null +++ b/webmagic-core/src/test/java/us/codecraft/webmagic/RequestTest.java @@ -0,0 +1,25 @@ +package us.codecraft.webmagic; + +import org.junit.Test; +import us.codecraft.webmagic.utils.HttpConstant; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * @author code4crafter@gmail.com + * Date: 17/3/11 + */ +public class RequestTest { + + @Test + public void testEqualsAndHashCode() throws Exception { + Request requestA = new Request("http://www.google.com/"); + Request requestB = new Request("http://www.google.com/"); + assertThat(requestA.hashCode()).isEqualTo(requestB.hashCode()); + assertThat(requestA).isEqualTo(requestB); + requestA.setMethod(HttpConstant.Method.GET); + requestA.setMethod(HttpConstant.Method.POST); + assertThat(requestA).isNotEqualTo(requestB); + assertThat(requestA.hashCode()).isNotEqualTo(requestB.hashCode()); + } +} diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java index 1735e00b..0e442a87 100644 --- a/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java +++ b/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java @@ -5,13 +5,17 @@ import com.github.dreamhead.moco.Runnable; import com.github.dreamhead.moco.Runner; import org.apache.commons.io.IOUtils; import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.RequestBuilder; import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.util.EntityUtils; import org.junit.Test; import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Task; import us.codecraft.webmagic.selector.Html; +import us.codecraft.webmagic.utils.HttpConstant; import java.io.IOException; import java.io.UnsupportedEncodingException; @@ -103,4 +107,42 @@ public class HttpClientDownloaderTest { } }); } + + @Test + public void test_selectRequestMethod() throws Exception { + HttpServer server = httpserver(12306); + server.get(eq(query("q"), "webmagic")).response("get"); + server.post(eq(form("q"), "webmagic")).response("post"); + server.put(eq(form("q"), "webmagic")).response("put"); + server.delete(eq(query("q"), "webmagic")).response("delete"); + server.request(and(by(method("HEAD")),eq(query("q"), "webmagic"))).response(header("method","head")); + server.request(and(by(method("TRACE")),eq(query("q"), "webmagic"))).response("trace"); + Runner.running(server, new Runnable() { + @Override + public void run() throws Exception { + HttpClientDownloader httpClientDownloader = new HttpClientDownloader(); + Request request = new Request(); + request.setUrl("http://127.0.0.1:12306/search"); + request.putParams("q", "webmagic"); + request.setMethod(HttpConstant.Method.GET); + RequestBuilder requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl()); + assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("get"); + request.setMethod(HttpConstant.Method.POST); + requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl()); + assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("post"); + request.setMethod(HttpConstant.Method.PUT); + requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl()); + assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("put"); + request.setMethod(HttpConstant.Method.DELETE); + requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl()); + assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("delete"); + request.setMethod(HttpConstant.Method.HEAD); + requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl()); + assertThat(HttpClients.custom().build().execute(requestBuilder.build()).getFirstHeader("method").getValue()).isEqualTo("head"); + request.setMethod(HttpConstant.Method.TRACE); + requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl()); + assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("trace"); + } + }); + } } diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/scheduler/DuplicateRemovedSchedulerTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/scheduler/DuplicateRemovedSchedulerTest.java new file mode 100644 index 00000000..a0980494 --- /dev/null +++ b/webmagic-core/src/test/java/us/codecraft/webmagic/scheduler/DuplicateRemovedSchedulerTest.java @@ -0,0 +1,50 @@ +package us.codecraft.webmagic.scheduler; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mockito; +import org.mockito.runners.MockitoJUnitRunner; +import us.codecraft.webmagic.Request; +import us.codecraft.webmagic.Task; +import us.codecraft.webmagic.scheduler.component.DuplicateRemover; +import us.codecraft.webmagic.utils.HttpConstant; + +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +/** + * @author code4crafter@gmail.com + * Date: 17/3/11 + * Time: 上午11:26 + */ +@RunWith(MockitoJUnitRunner.class) +public class DuplicateRemovedSchedulerTest { + + private DuplicateRemovedScheduler duplicateRemovedScheduler = new DuplicateRemovedScheduler() { + @Override + public Request poll(Task task) { + return null; + } + }; + + @Test + public void test_no_duplicate_removed_for_post_request() throws Exception { + DuplicateRemover duplicateRemover = Mockito.mock(DuplicateRemover.class); + duplicateRemovedScheduler.setDuplicateRemover(duplicateRemover); + Request request = new Request("https://www.google.com/"); + request.setMethod(HttpConstant.Method.POST); + duplicateRemovedScheduler.push(request, null); + verify(duplicateRemover,times(0)).isDuplicate(any(Request.class),any(Task.class)); + } + + @Test + public void test_duplicate_removed_for_get_request() throws Exception { + DuplicateRemover duplicateRemover = Mockito.mock(DuplicateRemover.class); + duplicateRemovedScheduler.setDuplicateRemover(duplicateRemover); + Request request = new Request("https://www.google.com/"); + request.setMethod(HttpConstant.Method.GET); + duplicateRemovedScheduler.push(request, null); + verify(duplicateRemover,times(1)).isDuplicate(any(Request.class),any(Task.class)); + } +} diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/selector/RegexSelectorTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/selector/RegexSelectorTest.java index 63e8e43b..144e6fe2 100644 --- a/webmagic-core/src/test/java/us/codecraft/webmagic/selector/RegexSelectorTest.java +++ b/webmagic-core/src/test/java/us/codecraft/webmagic/selector/RegexSelectorTest.java @@ -22,4 +22,20 @@ public class RegexSelectorTest { String select = regexSelector.select(source); Assertions.assertThat(select).isEqualTo(source); } + + @Test + public void testRegexWithZeroWidthAssertions() { + String regex = "^.*(?=\\?)"; + String source = "hello world?xxxx"; + RegexSelector regexSelector = new RegexSelector(regex); + String select = regexSelector.select(source); + Assertions.assertThat(select).isEqualTo("hello world"); + + + regex = "\\d{3}(?!\\d)"; + source = "123456asdf"; + regexSelector = new RegexSelector(regex); + select = regexSelector.select(source); + Assertions.assertThat(select).isEqualTo("456"); + } } diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/utils/UrlUtilsTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/utils/UrlUtilsTest.java index 86b9db35..a90304dc 100644 --- a/webmagic-core/src/test/java/us/codecraft/webmagic/utils/UrlUtilsTest.java +++ b/webmagic-core/src/test/java/us/codecraft/webmagic/utils/UrlUtilsTest.java @@ -20,6 +20,9 @@ public class UrlUtilsTest { absoluteUrl = UrlUtils.canonicalizeUrl("../aa", "http://www.dianping.com/sh/ss/com"); assertThat(absoluteUrl).isEqualTo("http://www.dianping.com/sh/aa"); + absoluteUrl = UrlUtils.canonicalizeUrl("../mshz", "http://www.court.gov.cn/zgcpwsw/zgrmfy/"); + assertThat(absoluteUrl).isEqualTo("http://www.court.gov.cn/zgcpwsw/mshz"); + absoluteUrl = UrlUtils.canonicalizeUrl("..aa", "http://www.dianping.com/sh/ss/com"); assertThat(absoluteUrl).isEqualTo("http://www.dianping.com/sh/ss/..aa"); diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java index 61551b13..59f4b3f4 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java @@ -48,11 +48,7 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor public boolean isDuplicate(Request request, Task task) { Jedis jedis = pool.getResource(); try { - boolean isDuplicate = jedis.sismember(getSetKey(task), request.getUrl()); - if (!isDuplicate) { - jedis.sadd(getSetKey(task), request.getUrl()); - } - return isDuplicate; + return jedis.sadd(getSetKey(task), request.getUrl()) > 0; } finally { pool.returnResource(jedis); } diff --git a/webmagic-selenium/pom.xml b/webmagic-selenium/pom.xml index 6ddc61cf..b66ca0cf 100644 --- a/webmagic-selenium/pom.xml +++ b/webmagic-selenium/pom.xml @@ -13,7 +13,7 @@ org.seleniumhq.selenium selenium-java - 2.46.0 + 2.41.0 us.codecraft diff --git a/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/WebDriverPool.java b/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/WebDriverPool.java index 59f83ea5..1472cb32 100644 --- a/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/WebDriverPool.java +++ b/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/WebDriverPool.java @@ -45,7 +45,7 @@ class WebDriverPool { private WebDriver mDriver = null; private boolean mAutoQuitDriver = true; - private static final String CONFIG_FILE = "/Users/Bingo/Documents/workspace/webmagic/webmagic-selenium/config.ini"; + private static final String DEFAULT_CONFIG_FILE = "/data/webmagic/webmagic-selenium/config.ini"; private static final String DRIVER_FIREFOX = "firefox"; private static final String DRIVER_CHROME = "chrome"; private static final String DRIVER_PHANTOMJS = "phantomjs"; @@ -64,7 +64,11 @@ class WebDriverPool { public void configure() throws IOException { // Read config file sConfig = new Properties(); - sConfig.load(new FileReader(CONFIG_FILE)); + String configFile = DEFAULT_CONFIG_FILE; + if (System.getProperty("selenuim_config")!=null){ + configFile = System.getProperty("selenuim_config"); + } + sConfig.load(new FileReader(configFile)); // Prepare capabilities sCaps = new DesiredCapabilities(); diff --git a/webmagic-selenium/src/test/java/us/codecraft/webmagic/samples/HuabanProcessor.java b/webmagic-selenium/src/test/java/us/codecraft/webmagic/samples/HuabanProcessor.java index 2854a766..ad3a3e5b 100644 --- a/webmagic-selenium/src/test/java/us/codecraft/webmagic/samples/HuabanProcessor.java +++ b/webmagic-selenium/src/test/java/us/codecraft/webmagic/samples/HuabanProcessor.java @@ -22,7 +22,7 @@ public class HuabanProcessor implements PageProcessor { public void process(Page page) { page.addTargetRequests(page.getHtml().links().regex("http://huaban\\.com/.*").all()); if (page.getUrl().toString().contains("pins")) { - page.putField("img", page.getHtml().xpath("//div[@id='pin_img']/a/img/@src").toString()); + page.putField("img", page.getHtml().xpath("//div[@class='image-holder']/a/img/@src").toString()); } else { page.getResultItems().setSkip(true); } diff --git a/webmagic-selenium/src/test/resources/config.ini b/webmagic-selenium/src/test/resources/config.ini new file mode 100644 index 00000000..6bd19af1 --- /dev/null +++ b/webmagic-selenium/src/test/resources/config.ini @@ -0,0 +1,11 @@ +#driver=phantomjs +#driver=firefox +driver=chrome +#driver=http://localhost:8910 +driver=http://localhost:4444/wd/hub + +# PhantomJS specific config (change according to your installation) +#phantomjs_exec_path=/Users/detro/bin/phantomjs-qt5 +phantomjs_exec_path=/Users/detro/bin/phantomjs-upstream +phantomjs_driver_path=../../src/main.js +phantomjs_driver_loglevel=DEBUG \ No newline at end of file