修改SmartContentSelector threshold可定制化 (#1183)

* 修改SmartContentSelector threshold可定制化

* 修改SmartContentSelector threshold可定制化

---------

Co-authored-by: zhaoyiwei <zhaoyiwei@zhongan.com>
pull/1186/head
zyw61483 2 months ago committed by GitHub
parent 8dc417452a
commit 9bb2417f58
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -31,6 +31,11 @@ public class HtmlNode extends AbstractSelectable {
return select(smartContentSelector, getSourceTexts()); return select(smartContentSelector, getSourceTexts());
} }
public Selectable smartContent(int threshold) {
SmartContentSelector smartContentSelector = Selectors.smartContent(threshold);
return select(smartContentSelector, getSourceTexts());
}
@Override @Override
public Selectable links() { public Selectable links() {
return selectElements(new LinksSelector()); return selectElements(new LinksSelector());

@ -20,6 +20,10 @@ public abstract class Selectors {
return new SmartContentSelector(); return new SmartContentSelector();
} }
public static SmartContentSelector smartContent(int threshold) {
return new SmartContentSelector(threshold);
}
public static CssSelector $(String expr) { public static CssSelector $(String expr) {
return new CssSelector(expr); return new CssSelector(expr);
} }

@ -16,9 +16,15 @@ import java.util.List;
@Experimental @Experimental
public class SmartContentSelector implements Selector { public class SmartContentSelector implements Selector {
private int threshold = 86;
public SmartContentSelector() { public SmartContentSelector() {
} }
public SmartContentSelector(int threshold) {
this.threshold = threshold;
}
@Override @Override
public String select(String html) { public String select(String html) {
html = html.replaceAll("(?is)<!DOCTYPE.*?>", ""); html = html.replaceAll("(?is)<!DOCTYPE.*?>", "");
@ -29,7 +35,6 @@ public class SmartContentSelector implements Selector {
html = html.replaceAll("(?is)<.*?>", ""); html = html.replaceAll("(?is)<.*?>", "");
List<String> lines; List<String> lines;
int blocksWidth =3; int blocksWidth =3;
int threshold =86;
int start; int start;
int end; int end;
StringBuilder text = new StringBuilder(); StringBuilder text = new StringBuilder();

Loading…
Cancel
Save