diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/DuplicateRemovedScheduler.java b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/DuplicateRemovedScheduler.java
index 9be7adb5..6b7ebae6 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/DuplicateRemovedScheduler.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/DuplicateRemovedScheduler.java
@@ -6,6 +6,7 @@ import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.scheduler.component.DuplicateRemover;
import us.codecraft.webmagic.scheduler.component.HashSetDuplicateRemover;
+import us.codecraft.webmagic.utils.HttpConstant;
/**
* Remove duplicate urls and only push urls which are not duplicate.
@@ -31,7 +32,7 @@ public abstract class DuplicateRemovedScheduler implements Scheduler {
@Override
public void push(Request request, Task task) {
logger.trace("get a candidate url {}", request.getUrl());
- if (!duplicatedRemover.isDuplicate(request, task) || shouldReserved(request)) {
+ if (!duplicatedRemover.isDuplicate(request, task) || shouldReserved(request) || noNeedToRemoveDuplicate(request)) {
logger.debug("push to queue {}", request.getUrl());
pushWhenNoDuplicate(request, task);
}
@@ -41,6 +42,10 @@ public abstract class DuplicateRemovedScheduler implements Scheduler {
return request.getExtra(Request.CYCLE_TRIED_TIMES) != null;
}
+ protected boolean noNeedToRemoveDuplicate(Request request) {
+ return HttpConstant.Method.POST.equalsIgnoreCase(request.getMethod());
+ }
+
protected void pushWhenNoDuplicate(Request request, Task task) {
}