add xpath2.0 api

pull/17/head
yihua.huang 12 years ago
parent 5c96407a3d
commit 36494bcfa5

@ -7,25 +7,18 @@ import java.util.Map;
/**
* <br>
*
* @author code4crafter@gmail.com <br>
* Date: 13-4-21
* Time: 1:45
* Date: 13-4-21
* Time: 1:45
*/
public class ConsolePipeline implements Pipeline{
public class ConsolePipeline implements Pipeline {
@Override
public void process(ResultItems resultItems,Task task) {
System.out.println("get page: "+resultItems.getRequest().getUrl());
public void process(ResultItems resultItems, Task task) {
System.out.println("get page: " + resultItems.getRequest().getUrl());
for (Map.Entry<String, Object> entry : resultItems.getAll().entrySet()) {
if (entry.getValue() instanceof Iterable) {
Iterable value = (Iterable) entry.getValue();
System.out.println(entry.getKey() + ":");
for (Object o : value) {
System.out.println(o);
}
} else {
System.out.println(entry.getKey() + ":\t" + entry.getValue());
}
System.out.println(entry.getKey() + ":\t" + entry.getValue());
}
}
}

@ -63,6 +63,12 @@ public class Html extends PlainText {
return selectList(xpathSelector, strings);
}
@Override
public Selectable xpath2(String xpath) {
Xpath2Selector xpathSelector = SelectorFactory.getInstatnce().newXpath2Selector(xpath);
return selectList(xpathSelector, strings);
}
@Override
public Selectable $(String selector) {
CssSelector cssSelector = new CssSelector(selector);

@ -34,6 +34,11 @@ public class PlainText implements Selectable {
throw new UnsupportedOperationException();
}
@Override
public Selectable xpath2(String xpath) {
throw new UnsupportedOperationException();
}
@Override
public Selectable $(String selector) {
throw new UnsupportedOperationException();

@ -18,6 +18,14 @@ public interface Selectable {
*/
public Selectable xpath(String xpath);
/**
* select list with xpath 2.0 syntax
*
* @param xpath
* @return new Selectable after extract
*/
public Selectable xpath2(String xpath);
/**
* select list with css selector
*

@ -34,6 +34,10 @@ public class SelectorFactory {
return newSelector(XpathSelector.class, xpath);
}
public Xpath2Selector newXpath2Selector(String xpath) {
return newSelector(Xpath2Selector.class, xpath);
}
public SmartContentSelector newSmartContentSelector(){
return newSelector(SmartContentSelector.class);
}

@ -1,9 +1,10 @@
package us.codecraft.webmagic.schedular;
package us.codecraft.webmagic.scheduler;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.log4j.Logger;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.schedular.Scheduler;
import java.io.*;
import java.util.LinkedHashSet;

@ -4,7 +4,6 @@ import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.model.ExtractBy;
import us.codecraft.webmagic.model.OOSpider;
import us.codecraft.webmagic.model.TargetUrl;
import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.pipeline.JsonFilePipeline;
/**
@ -30,7 +29,7 @@ public class OschinaBlog implements Blog{
}
public static void main(String[] args) {
OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog"), OschinaBlog.class).pipeline(new ConsolePipeline()).pipeline(new JsonFilePipeline()).run();
OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog"), OschinaBlog.class).pipeline(new JsonFilePipeline()).run();
}
public String getTitle() {

@ -3,7 +3,7 @@ package us.codecraft.webmagic.samples;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.pipeline.FilePipeline;
import us.codecraft.webmagic.processor.SimplePageProcessor;
import us.codecraft.webmagic.schedular.FileCacheQueueScheduler;
import us.codecraft.webmagic.scheduler.FileCacheQueueScheduler;
/**
* @author code4crafter@gmail.com <br>

@ -5,7 +5,7 @@ import org.junit.Test;
import us.codecraft.webmagic.pipeline.FilePipeline;
import us.codecraft.webmagic.processor.SimplePageProcessor;
import us.codecraft.webmagic.samples.HuxiuProcessor;
import us.codecraft.webmagic.schedular.FileCacheQueueScheduler;
import us.codecraft.webmagic.scheduler.FileCacheQueueScheduler;
/**
* @author code4crafter@gmail.com <br>

@ -6,7 +6,7 @@ import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.pipeline.FreemarkerPipeline;
import us.codecraft.webmagic.samples.DiandianBlogProcessor;
import us.codecraft.webmagic.schedular.FileCacheQueueScheduler;
import us.codecraft.webmagic.scheduler.FileCacheQueueScheduler;
import java.io.IOException;

@ -6,7 +6,7 @@ import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.pipeline.FilePipeline;
import us.codecraft.webmagic.pipeline.FreemarkerPipeline;
import us.codecraft.webmagic.samples.DiaoyuwengProcessor;
import us.codecraft.webmagic.schedular.FileCacheQueueScheduler;
import us.codecraft.webmagic.scheduler.FileCacheQueueScheduler;
import java.io.IOException;

@ -6,7 +6,7 @@ import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.pipeline.FilePipeline;
import us.codecraft.webmagic.pipeline.FreemarkerPipeline;
import us.codecraft.webmagic.samples.SinaBlogProcesser;
import us.codecraft.webmagic.schedular.FileCacheQueueScheduler;
import us.codecraft.webmagic.scheduler.FileCacheQueueScheduler;
import java.io.IOException;

Loading…
Cancel
Save