Upgrade dependencies, including the jedis from 2.9.3 to 3.4.1.

pull/1000/head
Sutra Zhou 4 years ago
parent 0d73f08ef6
commit 0e01550a79

@ -73,17 +73,17 @@
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>4.4.13</version>
<version>4.4.14</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>30.0-android</version>
<version>30.1-jre</version>
</dependency>
<dependency>
<groupId>com.jayway.jsonpath</groupId>
<artifactId>json-path</artifactId>
<version>2.4.0</version>
<version>2.5.0</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
@ -103,7 +103,7 @@
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.69</version>
<version>1.2.75</version>
</dependency>
<dependency>
<groupId>com.github.dreamhead</groupId>
@ -125,13 +125,13 @@
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<version>3.16.1</version>
<version>3.18.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.10</version>
<version>3.11</version>
</dependency>
<dependency>
<groupId>commons-collections</groupId>
@ -141,17 +141,17 @@
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.7</version>
<version>2.8.0</version>
</dependency>
<dependency>
<groupId>org.codehaus.groovy</groupId>
<artifactId>groovy-all</artifactId>
<version>2.4.19</version>
<version>3.0.7</version>
</dependency>
<dependency>
<groupId>org.jruby</groupId>
<artifactId>jruby</artifactId>
<version>9.2.11.1</version>
<version>9.2.14.0</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
@ -171,12 +171,12 @@
<dependency>
<groupId>net.sf.saxon</groupId>
<artifactId>Saxon-HE</artifactId>
<version>10.1</version>
<version>10.3</version>
</dependency>
<dependency>
<groupId>net.sourceforge.htmlcleaner</groupId>
<artifactId>htmlcleaner</artifactId>
<version>2.5</version>
<version>2.24</version>
</dependency>
<dependency>
<groupId>com.github.detro</groupId>
@ -191,7 +191,7 @@
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>2.9.3</version>
<version>3.4.1</version>
</dependency>
</dependencies>
</dependencyManagement>

@ -1,12 +1,12 @@
package us.codecraft.webmagic.selector;
import org.jsoup.helper.StringUtil;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/**
* Links selector based on jsoup. Use absolute url. <br>
*
@ -23,9 +23,9 @@ public class LinksSelector extends BaseElementSelector {
@Override
public List<String> selectList(Element element) {
Elements elements = element.select("a");
List<String> links = new ArrayList<String>(elements.size());
List<String> links = new ArrayList<>(elements.size());
for (Element element0 : elements) {
if (!StringUtil.isBlank(element0.baseUri())) {
if (StringUtils.isNotBlank(element0.baseUri())) {
links.add(element0.attr("abs:href"));
} else {
links.add(element0.attr("href"));

@ -1,22 +1,23 @@
package us.codecraft.webmagic.scheduler;
import com.alibaba.fastjson.JSON;
import java.util.Set;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils;
import com.alibaba.fastjson.JSON;
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
import java.util.Set;
/**
* the redis scheduler with priority
* @author sai
* Created by sai on 16-5-27.
*/
public class RedisPriorityScheduler extends RedisScheduler
{
public class RedisPriorityScheduler extends RedisScheduler {
private static final String ZSET_PREFIX = "zset_";
@ -37,62 +38,44 @@ public class RedisPriorityScheduler extends RedisScheduler
}
@Override
protected void pushWhenNoDuplicate(Request request, Task task)
{
Jedis jedis = pool.getResource();
try
{
if(request.getPriority() > 0)
protected void pushWhenNoDuplicate(Request request, Task task) {
try (Jedis jedis = pool.getResource()) {
if (request.getPriority() > 0) {
jedis.zadd(getZsetPlusPriorityKey(task), request.getPriority(), request.getUrl());
else if(request.getPriority() < 0)
} else if (request.getPriority() < 0) {
jedis.zadd(getZsetMinusPriorityKey(task), request.getPriority(), request.getUrl());
else
} else {
jedis.lpush(getQueueNoPriorityKey(task), request.getUrl());
}
setExtrasInItem(jedis, request, task);
}
finally
{
pool.returnResource(jedis);
}
}
@Override
public synchronized Request poll(Task task)
{
Jedis jedis = pool.getResource();
try
{
public synchronized Request poll(Task task) {
try (Jedis jedis = pool.getResource()) {
String url = getRequest(jedis, task);
if(StringUtils.isBlank(url))
if (StringUtils.isBlank(url)) {
return null;
return getExtrasInItem(jedis, url, task);
}
finally
{
pool.returnResource(jedis);
return getExtrasInItem(jedis, url, task);
}
}
private String getRequest(Jedis jedis, Task task)
{
private String getRequest(Jedis jedis, Task task) {
String url;
Set<String> urls = jedis.zrevrange(getZsetPlusPriorityKey(task), 0, 0);
if(urls.isEmpty())
{
if (urls.isEmpty()) {
url = jedis.lpop(getQueueNoPriorityKey(task));
if(StringUtils.isBlank(url))
{
if (StringUtils.isBlank(url)) {
urls = jedis.zrevrange(getZsetMinusPriorityKey(task), 0, 0);
if(!urls.isEmpty())
{
if (!urls.isEmpty()) {
url = urls.toArray(new String[0])[0];
jedis.zrem(getZsetMinusPriorityKey(task), url);
}
}
}
else
{
} else {
url = urls.toArray(new String[0])[0];
jedis.zrem(getZsetPlusPriorityKey(task), url);
}
@ -100,51 +83,39 @@ public class RedisPriorityScheduler extends RedisScheduler
}
@Override
public void resetDuplicateCheck(Task task)
{
Jedis jedis = pool.getResource();
try
{
public void resetDuplicateCheck(Task task) {
try (Jedis jedis = pool.getResource()) {
jedis.del(getSetKey(task));
}
finally
{
pool.returnResource(jedis);
}
}
private String getZsetPlusPriorityKey(Task task)
{
private String getZsetPlusPriorityKey(Task task) {
return ZSET_PREFIX + task.getUUID() + PLUS_PRIORITY_SUFFIX;
}
private String getQueueNoPriorityKey(Task task)
{
private String getQueueNoPriorityKey(Task task) {
return QUEUE_PREFIX + task.getUUID() + NO_PRIORITY_SUFFIX;
}
private String getZsetMinusPriorityKey(Task task)
{
private String getZsetMinusPriorityKey(Task task) {
return ZSET_PREFIX + task.getUUID() + MINUS_PRIORITY_SUFFIX;
}
private void setExtrasInItem(Jedis jedis,Request request, Task task)
{
if(request.getExtras() != null)
{
String field = DigestUtils.shaHex(request.getUrl());
private void setExtrasInItem(Jedis jedis,Request request, Task task) {
if (request.getExtras() != null) {
String field = DigestUtils.sha1Hex(request.getUrl());
String value = JSON.toJSONString(request);
jedis.hset(getItemKey(task), field, value);
}
}
private Request getExtrasInItem(Jedis jedis, String url, Task task)
{
private Request getExtrasInItem(Jedis jedis, String url, Task task) {
String key = getItemKey(task);
String field = DigestUtils.shaHex(url);
String field = DigestUtils.sha1Hex(url);
byte[] bytes = jedis.hget(key.getBytes(), field.getBytes());
if(bytes != null)
if (bytes != null) {
return JSON.parseObject(new String(bytes), Request.class);
}
return new Request(url);
}
}

@ -1,8 +1,10 @@
package us.codecraft.webmagic.scheduler;
import com.alibaba.fastjson.JSON;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils;
import com.alibaba.fastjson.JSON;
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig;
@ -37,21 +39,15 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
@Override
public void resetDuplicateCheck(Task task) {
Jedis jedis = pool.getResource();
try {
try (Jedis jedis = pool.getResource()) {
jedis.del(getSetKey(task));
} finally {
pool.returnResource(jedis);
}
}
@Override
public boolean isDuplicate(Request request, Task task) {
Jedis jedis = pool.getResource();
try {
try (Jedis jedis = pool.getResource()) {
return jedis.sadd(getSetKey(task), request.getUrl()) == 0;
} finally {
pool.returnResource(jedis);
}
}
@ -62,7 +58,7 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
try {
jedis.rpush(getQueueKey(task), request.getUrl());
if (checkForAdditionalInfo(request)) {
String field = DigestUtils.shaHex(request.getUrl());
String field = DigestUtils.sha1Hex(request.getUrl());
String value = JSON.toJSONString(request);
jedis.hset((ITEM_PREFIX + task.getUUID()), field, value);
}
@ -100,14 +96,13 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
@Override
public synchronized Request poll(Task task) {
Jedis jedis = pool.getResource();
try {
try (Jedis jedis = pool.getResource()) {
String url = jedis.lpop(getQueueKey(task));
if (url == null) {
return null;
}
String key = ITEM_PREFIX + task.getUUID();
String field = DigestUtils.shaHex(url);
String field = DigestUtils.sha1Hex(url);
byte[] bytes = jedis.hget(key.getBytes(), field.getBytes());
if (bytes != null) {
Request o = JSON.parseObject(new String(bytes), Request.class);
@ -115,8 +110,6 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
}
Request request = new Request(url);
return request;
} finally {
pool.returnResource(jedis);
}
}
@ -134,23 +127,17 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
@Override
public int getLeftRequestsCount(Task task) {
Jedis jedis = pool.getResource();
try {
try (Jedis jedis = pool.getResource()) {
Long size = jedis.llen(getQueueKey(task));
return size.intValue();
} finally {
pool.returnResource(jedis);
}
}
@Override
public int getTotalRequestsCount(Task task) {
Jedis jedis = pool.getResource();
try {
try (Jedis jedis = pool.getResource()) {
Long size = jedis.scard(getSetKey(task));
return size.intValue();
} finally {
pool.returnResource(jedis);
}
}
}

Loading…
Cancel
Save