Refactored code for increased optimization. (#1139)
* refactoring by decompose conditional technique * refactoring by introduction explaining variable technique * refactoring by rename method/variable technique * refactoring by introducing explaining variable technique * Added Extract class refactoring to increase maintainablilty * Refactoring using replace conditional with polymorphismpull/1153/head
parent
9b9f173c1c
commit
f051d978e2
@ -0,0 +1,53 @@
|
|||||||
|
package us.codecraft.webmagic.selector;
|
||||||
|
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ListIterator;
|
||||||
|
|
||||||
|
public class ElementsUtil {
|
||||||
|
HtmlNode htmlNode = new HtmlNode();
|
||||||
|
public Selectable selectElements(BaseElementSelector elementSelector) {
|
||||||
|
ListIterator<Element> elementIterator = htmlNode.getElements().listIterator();
|
||||||
|
if (!elementSelector.hasAttribute()) {
|
||||||
|
List<Element> resultElements = new ArrayList<Element>();
|
||||||
|
while (elementIterator.hasNext()) {
|
||||||
|
Element element = checkElementAndConvert(elementIterator);
|
||||||
|
List<Element> selectElements = elementSelector.selectElements(element);
|
||||||
|
resultElements.addAll(selectElements);
|
||||||
|
}
|
||||||
|
return new HtmlNode(resultElements);
|
||||||
|
} else {
|
||||||
|
// has attribute, consider as plaintext
|
||||||
|
List<String> resultStrings = new ArrayList<String>();
|
||||||
|
while (elementIterator.hasNext()) {
|
||||||
|
Element element = checkElementAndConvert(elementIterator);
|
||||||
|
List<String> selectList = elementSelector.selectList(element);
|
||||||
|
resultStrings.addAll(selectList);
|
||||||
|
}
|
||||||
|
return new PlainText(resultStrings);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Only document can be select
|
||||||
|
* See: https://github.com/code4craft/webmagic/issues/113
|
||||||
|
*
|
||||||
|
* @param elementIterator elementIterator
|
||||||
|
* @return element element
|
||||||
|
*/
|
||||||
|
public Element checkElementAndConvert(ListIterator<Element> elementIterator) {
|
||||||
|
Element element = elementIterator.next();
|
||||||
|
if (!(element instanceof Document)) {
|
||||||
|
Document root = new Document(element.ownerDocument().baseUri());
|
||||||
|
Element clone = element.clone();
|
||||||
|
root.appendChild(clone);
|
||||||
|
elementIterator.set(root);
|
||||||
|
return root;
|
||||||
|
}
|
||||||
|
return element;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,57 @@
|
|||||||
|
package us.codecraft.webmagic.configurable;
|
||||||
|
|
||||||
|
import us.codecraft.webmagic.selector.JsonPathSelector;
|
||||||
|
import us.codecraft.webmagic.selector.Selector;
|
||||||
|
|
||||||
|
import static us.codecraft.webmagic.selector.Selectors.*;
|
||||||
|
public interface SelectorFactory {
|
||||||
|
Selector compileSelector(String expressionValue, String[] expressionParams);
|
||||||
|
}
|
||||||
|
|
||||||
|
class CssSelectorFactory implements SelectorFactory {
|
||||||
|
@Override
|
||||||
|
public Selector compileSelector(String expressionValue, String[] expressionParams) {
|
||||||
|
if (expressionParams.length >= 1) {
|
||||||
|
return $(expressionValue, expressionParams[0]);
|
||||||
|
} else {
|
||||||
|
return $(expressionValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class XPathSelectorFactory implements SelectorFactory {
|
||||||
|
@Override
|
||||||
|
public Selector compileSelector(String expressionValue, String[] expressionParams) {
|
||||||
|
return xpath(expressionValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class RegexSelectorFactory implements SelectorFactory {
|
||||||
|
@Override
|
||||||
|
public Selector compileSelector(String expressionValue, String[] expressionParams) {
|
||||||
|
if (expressionParams.length >= 1) {
|
||||||
|
return regex(expressionValue, Integer.parseInt(expressionParams[0]));
|
||||||
|
} else {
|
||||||
|
return regex(expressionValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class JsonPathSelectorFactory implements SelectorFactory {
|
||||||
|
@Override
|
||||||
|
public Selector compileSelector(String expressionValue, String[] expressionParams) {
|
||||||
|
return new JsonPathSelector(expressionValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class SelectorCompiler {
|
||||||
|
private final SelectorFactory selectorFactory;
|
||||||
|
|
||||||
|
public SelectorCompiler(SelectorFactory selectorFactory) {
|
||||||
|
this.selectorFactory = selectorFactory;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Selector compileSelector(String expressionValue, String[] expressionParams) {
|
||||||
|
return selectorFactory.compileSelector(expressionValue, expressionParams);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue