Upgrade dependencies, including the jedis from 2.9.3 to 3.4.1.

pull/1000/head
Sutra Zhou 4 years ago
parent 0d73f08ef6
commit 0e01550a79

@ -73,17 +73,17 @@
<dependency> <dependency>
<groupId>org.apache.httpcomponents</groupId> <groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId> <artifactId>httpcore</artifactId>
<version>4.4.13</version> <version>4.4.14</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.google.guava</groupId> <groupId>com.google.guava</groupId>
<artifactId>guava</artifactId> <artifactId>guava</artifactId>
<version>30.0-android</version> <version>30.1-jre</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.jayway.jsonpath</groupId> <groupId>com.jayway.jsonpath</groupId>
<artifactId>json-path</artifactId> <artifactId>json-path</artifactId>
<version>2.4.0</version> <version>2.5.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.slf4j</groupId> <groupId>org.slf4j</groupId>
@ -103,7 +103,7 @@
<dependency> <dependency>
<groupId>com.alibaba</groupId> <groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId> <artifactId>fastjson</artifactId>
<version>1.2.69</version> <version>1.2.75</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.github.dreamhead</groupId> <groupId>com.github.dreamhead</groupId>
@ -125,13 +125,13 @@
<dependency> <dependency>
<groupId>org.assertj</groupId> <groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId> <artifactId>assertj-core</artifactId>
<version>3.16.1</version> <version>3.18.1</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.commons</groupId> <groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId> <artifactId>commons-lang3</artifactId>
<version>3.10</version> <version>3.11</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>commons-collections</groupId> <groupId>commons-collections</groupId>
@ -139,19 +139,19 @@
<version>3.2.2</version> <version>3.2.2</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>commons-io</groupId> <groupId>commons-io</groupId>
<artifactId>commons-io</artifactId> <artifactId>commons-io</artifactId>
<version>2.7</version> <version>2.8.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.codehaus.groovy</groupId> <groupId>org.codehaus.groovy</groupId>
<artifactId>groovy-all</artifactId> <artifactId>groovy-all</artifactId>
<version>2.4.19</version> <version>3.0.7</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.jruby</groupId> <groupId>org.jruby</groupId>
<artifactId>jruby</artifactId> <artifactId>jruby</artifactId>
<version>9.2.11.1</version> <version>9.2.14.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.jsoup</groupId> <groupId>org.jsoup</groupId>
@ -171,12 +171,12 @@
<dependency> <dependency>
<groupId>net.sf.saxon</groupId> <groupId>net.sf.saxon</groupId>
<artifactId>Saxon-HE</artifactId> <artifactId>Saxon-HE</artifactId>
<version>10.1</version> <version>10.3</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>net.sourceforge.htmlcleaner</groupId> <groupId>net.sourceforge.htmlcleaner</groupId>
<artifactId>htmlcleaner</artifactId> <artifactId>htmlcleaner</artifactId>
<version>2.5</version> <version>2.24</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.github.detro</groupId> <groupId>com.github.detro</groupId>
@ -191,7 +191,7 @@
<dependency> <dependency>
<groupId>redis.clients</groupId> <groupId>redis.clients</groupId>
<artifactId>jedis</artifactId> <artifactId>jedis</artifactId>
<version>2.9.3</version> <version>3.4.1</version>
</dependency> </dependency>
</dependencies> </dependencies>
</dependencyManagement> </dependencyManagement>

@ -1,12 +1,12 @@
package us.codecraft.webmagic.selector; package us.codecraft.webmagic.selector;
import org.jsoup.helper.StringUtil;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/** /**
* Links selector based on jsoup. Use absolute url. <br> * Links selector based on jsoup. Use absolute url. <br>
* *
@ -23,9 +23,9 @@ public class LinksSelector extends BaseElementSelector {
@Override @Override
public List<String> selectList(Element element) { public List<String> selectList(Element element) {
Elements elements = element.select("a"); Elements elements = element.select("a");
List<String> links = new ArrayList<String>(elements.size()); List<String> links = new ArrayList<>(elements.size());
for (Element element0 : elements) { for (Element element0 : elements) {
if (!StringUtil.isBlank(element0.baseUri())) { if (StringUtils.isNotBlank(element0.baseUri())) {
links.add(element0.attr("abs:href")); links.add(element0.attr("abs:href"));
} else { } else {
links.add(element0.attr("href")); links.add(element0.attr("href"));

@ -1,22 +1,23 @@
package us.codecraft.webmagic.scheduler; package us.codecraft.webmagic.scheduler;
import com.alibaba.fastjson.JSON; import java.util.Set;
import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import com.alibaba.fastjson.JSON;
import redis.clients.jedis.Jedis; import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool; import redis.clients.jedis.JedisPool;
import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task; import us.codecraft.webmagic.Task;
import java.util.Set;
/** /**
* the redis scheduler with priority * the redis scheduler with priority
* @author sai * @author sai
* Created by sai on 16-5-27. * Created by sai on 16-5-27.
*/ */
public class RedisPriorityScheduler extends RedisScheduler public class RedisPriorityScheduler extends RedisScheduler {
{
private static final String ZSET_PREFIX = "zset_"; private static final String ZSET_PREFIX = "zset_";
@ -37,62 +38,44 @@ public class RedisPriorityScheduler extends RedisScheduler
} }
@Override @Override
protected void pushWhenNoDuplicate(Request request, Task task) protected void pushWhenNoDuplicate(Request request, Task task) {
{ try (Jedis jedis = pool.getResource()) {
Jedis jedis = pool.getResource(); if (request.getPriority() > 0) {
try
{
if(request.getPriority() > 0)
jedis.zadd(getZsetPlusPriorityKey(task), request.getPriority(), request.getUrl()); jedis.zadd(getZsetPlusPriorityKey(task), request.getPriority(), request.getUrl());
else if(request.getPriority() < 0) } else if (request.getPriority() < 0) {
jedis.zadd(getZsetMinusPriorityKey(task), request.getPriority(), request.getUrl()); jedis.zadd(getZsetMinusPriorityKey(task), request.getPriority(), request.getUrl());
else } else {
jedis.lpush(getQueueNoPriorityKey(task), request.getUrl()); jedis.lpush(getQueueNoPriorityKey(task), request.getUrl());
}
setExtrasInItem(jedis, request, task); setExtrasInItem(jedis, request, task);
} }
finally
{
pool.returnResource(jedis);
}
} }
@Override @Override
public synchronized Request poll(Task task) public synchronized Request poll(Task task) {
{ try (Jedis jedis = pool.getResource()) {
Jedis jedis = pool.getResource();
try
{
String url = getRequest(jedis, task); String url = getRequest(jedis, task);
if(StringUtils.isBlank(url)) if (StringUtils.isBlank(url)) {
return null; return null;
}
return getExtrasInItem(jedis, url, task); return getExtrasInItem(jedis, url, task);
} }
finally
{
pool.returnResource(jedis);
}
} }
private String getRequest(Jedis jedis, Task task) private String getRequest(Jedis jedis, Task task) {
{
String url; String url;
Set<String> urls = jedis.zrevrange(getZsetPlusPriorityKey(task), 0, 0); Set<String> urls = jedis.zrevrange(getZsetPlusPriorityKey(task), 0, 0);
if(urls.isEmpty()) if (urls.isEmpty()) {
{
url = jedis.lpop(getQueueNoPriorityKey(task)); url = jedis.lpop(getQueueNoPriorityKey(task));
if(StringUtils.isBlank(url)) if (StringUtils.isBlank(url)) {
{
urls = jedis.zrevrange(getZsetMinusPriorityKey(task), 0, 0); urls = jedis.zrevrange(getZsetMinusPriorityKey(task), 0, 0);
if(!urls.isEmpty()) if (!urls.isEmpty()) {
{
url = urls.toArray(new String[0])[0]; url = urls.toArray(new String[0])[0];
jedis.zrem(getZsetMinusPriorityKey(task), url); jedis.zrem(getZsetMinusPriorityKey(task), url);
} }
} }
} } else {
else
{
url = urls.toArray(new String[0])[0]; url = urls.toArray(new String[0])[0];
jedis.zrem(getZsetPlusPriorityKey(task), url); jedis.zrem(getZsetPlusPriorityKey(task), url);
} }
@ -100,51 +83,39 @@ public class RedisPriorityScheduler extends RedisScheduler
} }
@Override @Override
public void resetDuplicateCheck(Task task) public void resetDuplicateCheck(Task task) {
{ try (Jedis jedis = pool.getResource()) {
Jedis jedis = pool.getResource();
try
{
jedis.del(getSetKey(task)); jedis.del(getSetKey(task));
} }
finally
{
pool.returnResource(jedis);
}
} }
private String getZsetPlusPriorityKey(Task task) private String getZsetPlusPriorityKey(Task task) {
{
return ZSET_PREFIX + task.getUUID() + PLUS_PRIORITY_SUFFIX; return ZSET_PREFIX + task.getUUID() + PLUS_PRIORITY_SUFFIX;
} }
private String getQueueNoPriorityKey(Task task) private String getQueueNoPriorityKey(Task task) {
{
return QUEUE_PREFIX + task.getUUID() + NO_PRIORITY_SUFFIX; return QUEUE_PREFIX + task.getUUID() + NO_PRIORITY_SUFFIX;
} }
private String getZsetMinusPriorityKey(Task task) private String getZsetMinusPriorityKey(Task task) {
{
return ZSET_PREFIX + task.getUUID() + MINUS_PRIORITY_SUFFIX; return ZSET_PREFIX + task.getUUID() + MINUS_PRIORITY_SUFFIX;
} }
private void setExtrasInItem(Jedis jedis,Request request, Task task) private void setExtrasInItem(Jedis jedis,Request request, Task task) {
{ if (request.getExtras() != null) {
if(request.getExtras() != null) String field = DigestUtils.sha1Hex(request.getUrl());
{
String field = DigestUtils.shaHex(request.getUrl());
String value = JSON.toJSONString(request); String value = JSON.toJSONString(request);
jedis.hset(getItemKey(task), field, value); jedis.hset(getItemKey(task), field, value);
} }
} }
private Request getExtrasInItem(Jedis jedis, String url, Task task) private Request getExtrasInItem(Jedis jedis, String url, Task task) {
{
String key = getItemKey(task); String key = getItemKey(task);
String field = DigestUtils.shaHex(url); String field = DigestUtils.sha1Hex(url);
byte[] bytes = jedis.hget(key.getBytes(), field.getBytes()); byte[] bytes = jedis.hget(key.getBytes(), field.getBytes());
if(bytes != null) if (bytes != null) {
return JSON.parseObject(new String(bytes), Request.class); return JSON.parseObject(new String(bytes), Request.class);
}
return new Request(url); return new Request(url);
} }
} }

@ -1,8 +1,10 @@
package us.codecraft.webmagic.scheduler; package us.codecraft.webmagic.scheduler;
import com.alibaba.fastjson.JSON;
import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import com.alibaba.fastjson.JSON;
import redis.clients.jedis.Jedis; import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool; import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig; import redis.clients.jedis.JedisPoolConfig;
@ -37,21 +39,15 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
@Override @Override
public void resetDuplicateCheck(Task task) { public void resetDuplicateCheck(Task task) {
Jedis jedis = pool.getResource(); try (Jedis jedis = pool.getResource()) {
try {
jedis.del(getSetKey(task)); jedis.del(getSetKey(task));
} finally {
pool.returnResource(jedis);
} }
} }
@Override @Override
public boolean isDuplicate(Request request, Task task) { public boolean isDuplicate(Request request, Task task) {
Jedis jedis = pool.getResource(); try (Jedis jedis = pool.getResource()) {
try {
return jedis.sadd(getSetKey(task), request.getUrl()) == 0; return jedis.sadd(getSetKey(task), request.getUrl()) == 0;
} finally {
pool.returnResource(jedis);
} }
} }
@ -62,7 +58,7 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
try { try {
jedis.rpush(getQueueKey(task), request.getUrl()); jedis.rpush(getQueueKey(task), request.getUrl());
if (checkForAdditionalInfo(request)) { if (checkForAdditionalInfo(request)) {
String field = DigestUtils.shaHex(request.getUrl()); String field = DigestUtils.sha1Hex(request.getUrl());
String value = JSON.toJSONString(request); String value = JSON.toJSONString(request);
jedis.hset((ITEM_PREFIX + task.getUUID()), field, value); jedis.hset((ITEM_PREFIX + task.getUUID()), field, value);
} }
@ -100,14 +96,13 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
@Override @Override
public synchronized Request poll(Task task) { public synchronized Request poll(Task task) {
Jedis jedis = pool.getResource(); try (Jedis jedis = pool.getResource()) {
try {
String url = jedis.lpop(getQueueKey(task)); String url = jedis.lpop(getQueueKey(task));
if (url == null) { if (url == null) {
return null; return null;
} }
String key = ITEM_PREFIX + task.getUUID(); String key = ITEM_PREFIX + task.getUUID();
String field = DigestUtils.shaHex(url); String field = DigestUtils.sha1Hex(url);
byte[] bytes = jedis.hget(key.getBytes(), field.getBytes()); byte[] bytes = jedis.hget(key.getBytes(), field.getBytes());
if (bytes != null) { if (bytes != null) {
Request o = JSON.parseObject(new String(bytes), Request.class); Request o = JSON.parseObject(new String(bytes), Request.class);
@ -115,8 +110,6 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
} }
Request request = new Request(url); Request request = new Request(url);
return request; return request;
} finally {
pool.returnResource(jedis);
} }
} }
@ -134,23 +127,17 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
@Override @Override
public int getLeftRequestsCount(Task task) { public int getLeftRequestsCount(Task task) {
Jedis jedis = pool.getResource(); try (Jedis jedis = pool.getResource()) {
try {
Long size = jedis.llen(getQueueKey(task)); Long size = jedis.llen(getQueueKey(task));
return size.intValue(); return size.intValue();
} finally {
pool.returnResource(jedis);
} }
} }
@Override @Override
public int getTotalRequestsCount(Task task) { public int getTotalRequestsCount(Task task) {
Jedis jedis = pool.getResource(); try (Jedis jedis = pool.getResource()) {
try {
Long size = jedis.scard(getSetKey(task)); Long size = jedis.scard(getSetKey(task));
return size.intValue(); return size.intValue();
} finally {
pool.returnResource(jedis);
} }
} }
} }

Loading…
Cancel
Save