extension point of geturl #118

pull/121/head
yihua.huang 11 years ago
parent ec1c2e8cbc
commit 01aec7e1ab

@ -46,14 +46,18 @@ public class BloomFilterDuplicateRemover implements DuplicateRemover {
@Override @Override
public boolean isDuplicate(Request request, Task task) { public boolean isDuplicate(Request request, Task task) {
boolean isDuplicate = bloomFilter.mightContain(request.getUrl()); boolean isDuplicate = bloomFilter.mightContain(getUrl(request));
if (!isDuplicate) { if (!isDuplicate) {
bloomFilter.put(request.getUrl()); bloomFilter.put(getUrl(request));
counter.incrementAndGet(); counter.incrementAndGet();
} }
return isDuplicate; return isDuplicate;
} }
protected String getUrl(Request request) {
return request.getUrl();
}
@Override @Override
public void resetDuplicateCheck(Task task) { public void resetDuplicateCheck(Task task) {
rebuildBloomFilter(); rebuildBloomFilter();

@ -16,7 +16,11 @@ public class HashSetDuplicateRemover implements DuplicateRemover {
@Override @Override
public boolean isDuplicate(Request request, Task task) { public boolean isDuplicate(Request request, Task task) {
return !urls.add(request.getUrl()); return !urls.add(getUrl(request));
}
protected String getUrl(Request request) {
return request.getUrl();
} }
@Override @Override

Loading…
Cancel
Save