修改SmartContentSelector threshold可定制化 (#1183)

* 修改SmartContentSelector threshold可定制化

* 修改SmartContentSelector threshold可定制化

---------

Co-authored-by: zhaoyiwei <zhaoyiwei@zhongan.com>
pull/1186/head
zyw61483 2 months ago committed by GitHub
parent 8dc417452a
commit 9bb2417f58
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -31,6 +31,11 @@ public class HtmlNode extends AbstractSelectable {
return select(smartContentSelector, getSourceTexts());
}
public Selectable smartContent(int threshold) {
SmartContentSelector smartContentSelector = Selectors.smartContent(threshold);
return select(smartContentSelector, getSourceTexts());
}
@Override
public Selectable links() {
return selectElements(new LinksSelector());

@ -20,6 +20,10 @@ public abstract class Selectors {
return new SmartContentSelector();
}
public static SmartContentSelector smartContent(int threshold) {
return new SmartContentSelector(threshold);
}
public static CssSelector $(String expr) {
return new CssSelector(expr);
}

@ -16,9 +16,15 @@ import java.util.List;
@Experimental
public class SmartContentSelector implements Selector {
private int threshold = 86;
public SmartContentSelector() {
}
public SmartContentSelector(int threshold) {
this.threshold = threshold;
}
@Override
public String select(String html) {
html = html.replaceAll("(?is)<!DOCTYPE.*?>", "");
@ -29,7 +35,6 @@ public class SmartContentSelector implements Selector {
html = html.replaceAll("(?is)<.*?>", "");
List<String> lines;
int blocksWidth =3;
int threshold =86;
int start;
int end;
StringBuilder text = new StringBuilder();

Loading…
Cancel
Save