From ade5714d5054ec7fddfd5b31e382513c066a222c Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Sat, 10 Aug 2013 18:52:27 +0800 Subject: [PATCH] add https support --- .../webmagic/downloader/HttpClientPool.java | 2 ++ .../webmagic/model/samples/GithubRepo.java | 23 +++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/GithubRepo.java diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientPool.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientPool.java index 854f1e57..a412f745 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientPool.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientPool.java @@ -8,6 +8,7 @@ import org.apache.http.client.params.CookiePolicy; import org.apache.http.conn.scheme.PlainSocketFactory; import org.apache.http.conn.scheme.Scheme; import org.apache.http.conn.scheme.SchemeRegistry; +import org.apache.http.conn.ssl.SSLSocketFactory; import org.apache.http.impl.client.BasicCookieStore; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.impl.conn.PoolingClientConnectionManager; @@ -60,6 +61,7 @@ public class HttpClientPool { SchemeRegistry schemeRegistry = new SchemeRegistry(); schemeRegistry.register(new Scheme("http", 80, PlainSocketFactory.getSocketFactory())); + schemeRegistry.register(new Scheme("https", 443, SSLSocketFactory.getSocketFactory())); PoolingClientConnectionManager connectionManager = new PoolingClientConnectionManager(schemeRegistry); connectionManager.setMaxTotal(poolSize); diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/GithubRepo.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/GithubRepo.java new file mode 100644 index 00000000..f752829c --- /dev/null +++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/GithubRepo.java @@ -0,0 +1,23 @@ +package us.codecraft.webmagic.model.samples; + +import us.codecraft.webmagic.Site; +import us.codecraft.webmagic.model.ConsolePageModelPipeline; +import us.codecraft.webmagic.model.OOSpider; +import us.codecraft.webmagic.model.annotation.ExtractBy; +import us.codecraft.webmagic.model.annotation.TargetUrl; + +/** + * @author code4crafter@gmail.com
+ * Date: 13-8-10
+ * Time: 下午6:37
+ */ +@TargetUrl("https://github.com/code4craft/*") +public class GithubRepo { + + @ExtractBy("//h1[@class='entry-title']/strong/a/text()") + private String name; + + public static void main(String[] args) { + OOSpider.create(Site.me().addStartUrl("https://github.com/code4craft/"), new ConsolePageModelPipeline(), GithubRepo.class).run(); + } +}