[BugFix]Only one url from sourceRegion can be extracted #107

pull/121/head
yihua.huang 11 years ago
parent 08fa3b01c1
commit b06aa489fb

@ -122,6 +122,16 @@ public class PlainText implements Selectable {
}
}
@Override
public Selectable select(Selector selector) {
return select(selector, strings);
}
@Override
public Selectable selectList(Selector selector) {
return selectList(selector, strings);
}
@Override
public String toString() {
return get();

@ -128,4 +128,19 @@ public interface Selectable {
*/
public Selectable jsonPath(String jsonPath);
/**
* extract by custom selector
*
* @param selector
* @return
*/
public Selectable select(Selector selector);
/**
* extract by custom selector
*
* @param selector
* @return
*/
public Selectable selectList(Selector selector);
}

@ -7,9 +7,7 @@ import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selector;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -66,7 +64,7 @@ class ModelPageProcessor implements PageProcessor {
if (urlRegionSelector == null) {
links = page.getHtml().links().all();
} else {
links = urlRegionSelector.selectList(page.getHtml().toString());
links = page.getHtml().selectList(urlRegionSelector).links().all();
}
for (String link : links) {
for (Pattern targetUrlPattern : urlPatterns) {

Loading…
Cancel
Save