|
|
@ -16,9 +16,15 @@ import java.util.List;
|
|
|
|
@Experimental
|
|
|
|
@Experimental
|
|
|
|
public class SmartContentSelector implements Selector {
|
|
|
|
public class SmartContentSelector implements Selector {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private int threshold = 86;
|
|
|
|
|
|
|
|
|
|
|
|
public SmartContentSelector() {
|
|
|
|
public SmartContentSelector() {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public SmartContentSelector(int threshold) {
|
|
|
|
|
|
|
|
this.threshold = threshold;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
@Override
|
|
|
|
public String select(String html) {
|
|
|
|
public String select(String html) {
|
|
|
|
html = html.replaceAll("(?is)<!DOCTYPE.*?>", "");
|
|
|
|
html = html.replaceAll("(?is)<!DOCTYPE.*?>", "");
|
|
|
@ -29,7 +35,6 @@ public class SmartContentSelector implements Selector {
|
|
|
|
html = html.replaceAll("(?is)<.*?>", "");
|
|
|
|
html = html.replaceAll("(?is)<.*?>", "");
|
|
|
|
List<String> lines;
|
|
|
|
List<String> lines;
|
|
|
|
int blocksWidth =3;
|
|
|
|
int blocksWidth =3;
|
|
|
|
int threshold =86;
|
|
|
|
|
|
|
|
int start;
|
|
|
|
int start;
|
|
|
|
int end;
|
|
|
|
int end;
|
|
|
|
StringBuilder text = new StringBuilder();
|
|
|
|
StringBuilder text = new StringBuilder();
|
|
|
|