add more option to extractors

pull/17/head
yihua.huang 12 years ago
parent c70ed57025
commit 20705b34ac

@ -19,10 +19,17 @@ public class CssSelector implements Selector {
private String selectorText;
private String attrName;
public CssSelector(String selectorText) {
this.selectorText = selectorText;
}
public CssSelector(String selectorText, String attrName) {
this.selectorText = selectorText;
this.attrName = attrName;
}
@Override
public String select(String text) {
Document doc = Jsoup.parse(text);
@ -30,7 +37,15 @@ public class CssSelector implements Selector {
if (CollectionUtils.isEmpty(elements)) {
return null;
}
return elements.get(0).outerHtml();
return getValue(elements.get(0));
}
private String getValue(Element element) {
if (attrName == null) {
return element.outerHtml();
} else {
return element.attr(attrName);
}
}
@Override
@ -40,7 +55,10 @@ public class CssSelector implements Selector {
Elements elements = doc.select(selectorText);
if (CollectionUtils.isNotEmpty(elements)) {
for (Element element : elements) {
strings.add(element.outerHtml());
String value = getValue(element);
if (value != null) {
strings.add(value);
}
}
}
return strings;

@ -69,4 +69,10 @@ public class Html extends PlainText {
return selectList(cssSelector, strings);
}
@Override
public Selectable $(String selector, String attrName) {
CssSelector cssSelector = new CssSelector(selector, attrName);
return selectList(cssSelector, strings);
}
}

@ -40,6 +40,11 @@ public class PlainText implements Selectable {
throw new UnsupportedOperationException();
}
@Override
public Selectable $(String selector, String attrName) {
throw new UnsupportedOperationException();
}
@Override
public Selectable smartContent() {
throw new UnsupportedOperationException();
@ -56,6 +61,12 @@ public class PlainText implements Selectable {
return selectList(regexSelector, strings);
}
@Override
public Selectable regex(String regex, int group) {
RegexSelector regexSelector = SelectorFactory.getInstatnce().newRegexSelector(regex, group);
return selectList(regexSelector, strings);
}
protected Selectable select(Selector selector, List<String> strings) {
List<String> results = new ArrayList<String>();
for (String string : strings) {

@ -26,6 +26,15 @@ public interface Selectable {
*/
public Selectable $(String selector);
/**
* select list with css selector
*
* @param selector css selector expression
* @param attrName attribute name of css selector
* @return new Selectable after extract
*/
public Selectable $(String selector, String attrName);
/**
* select smart content with ReadAbility algorithm
*
@ -41,13 +50,22 @@ public interface Selectable {
public Selectable links();
/**
* select list with regex
* select list with regex, default group is group 1
*
* @param regex
* @return new Selectable after extract
*/
public Selectable regex(String regex);
/**
* select list with regex
*
* @param regex
* @param group
* @return new Selectable after extract
*/
public Selectable regex(String regex, int group);
/**
* replace with regex
*

@ -26,6 +26,10 @@ public class SelectorFactory {
return newSelector(RegexSelector.class, regex);
}
public RegexSelector newRegexSelector(String regex, int group) {
return newSelector(RegexSelector.class, regex, String.valueOf(group));
}
public ReplaceSelector newReplaceSelector(String regex, String replacement) {
return newSelector(ReplaceSelector.class, regex, replacement);
}

Loading…
Cancel
Save