add jokejimodel for example
parent
f094f31816
commit
ca29cd3ef2
@ -0,0 +1,29 @@
|
|||||||
|
package us.codecraft.webmagic.model.samples;
|
||||||
|
|
||||||
|
import us.codecraft.webmagic.Site;
|
||||||
|
import us.codecraft.webmagic.model.ConsolePageModelPipeline;
|
||||||
|
import us.codecraft.webmagic.model.OOSpider;
|
||||||
|
import us.codecraft.webmagic.model.annotation.ExtractBy;
|
||||||
|
import us.codecraft.webmagic.model.annotation.HelpUrl;
|
||||||
|
import us.codecraft.webmagic.model.annotation.TargetUrl;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author code4crafter@gmail.com
|
||||||
|
*/
|
||||||
|
@TargetUrl("http://www.jokeji.cn/jokehtml/jy/\\d+.htm")
|
||||||
|
@HelpUrl("http://www.jokeji.cn/list\\w+.htm")
|
||||||
|
public class JokejiModel {
|
||||||
|
|
||||||
|
@ExtractBy("//title/regex('<title>([^_]+)',1)")
|
||||||
|
private String title;
|
||||||
|
|
||||||
|
@ExtractBy("//div[@class=mob_txt]/tidyText()")
|
||||||
|
private String content;
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
OOSpider.create(Site.me().setDomain("www.jokeji.cn").setCharset("gbk").setSleepTime(100).setTimeOut(3000)
|
||||||
|
.setUserAgent("Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)")
|
||||||
|
, new ConsolePageModelPipeline(), JokejiModel.class).addUrl("http://www.jokeji.cn/").thread(2).run();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue