Skip to content

Commit 74bcc72

Browse files
committed
Merge branch 'develop'
测试develop分支merge到master分支上
2 parents 29ab5b7 + 6e9799e commit 74bcc72

33 files changed

+38921
-4050
lines changed

.classpath

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,6 @@
2323
</attributes>
2424
</classpathentry>
2525
<classpathentry kind="lib" path="lib/org.json-20120521.jar"/>
26+
<classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
2627
<classpathentry kind="output" path="target/classes"/>
2728
</classpath>

log/output.log

Lines changed: 5025 additions & 0 deletions
Large diffs are not rendered by default.

log/output.log.1

Lines changed: 15433 additions & 0 deletions
Large diffs are not rendered by default.

log/output.log.2

Lines changed: 17006 additions & 0 deletions
Large diffs are not rendered by default.

output.log

Lines changed: 0 additions & 4014 deletions
This file was deleted.

src/main/java/MultiSpider.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
public class MultiSpider {
1414

15-
private int threadNum = 1;
15+
private int threadNum = 10;
1616
private ExecutorService service;
1717
private String username;
1818
private String password;
@@ -53,6 +53,6 @@ public MultiSpider thread(int num) {
5353

5454
public static void main(String[] args) {
5555
MultiSpider spider = new MultiSpider();
56-
spider.create("tanghaodong25@163.com", "thd04180015").run();;
56+
spider.create("tanghaodong25@163.com", "***").run();
5757
}
5858
}

src/main/java/downloader/DownloaderTest.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
package downloader;
22

3+
import java.io.BufferedReader;
34
import java.io.IOException;
5+
import java.io.InputStream;
6+
import java.io.InputStreamReader;
47
import java.util.ArrayList;
58
import java.util.HashMap;
69
import java.util.Iterator;
@@ -24,6 +27,10 @@
2427
import org.apache.http.message.BasicNameValuePair;
2528
import org.apache.http.util.EntityUtils;
2629

30+
import selector.XpathSelector;
31+
32+
33+
2734
public class DownloaderTest {
2835
private List<String> ProblemDescriptionPath = new ArrayList<String>();
2936
private List<String> ProblemSubmissionPath = new ArrayList<String>();
@@ -60,6 +67,9 @@ public static void main(String[] args) throws ClientProtocolException, IOExcepti
6067
HttpGet httpGet1 = new HttpGet("https://leetcode.com/problems/rotate-array/submissions/");
6168
response1 = httpClient.execute(httpGet1);
6269
printResponse(response1);
70+
XpathSelector xpathSelector = new XpathSelector("//div[@class='row']/div/div/a/@href");
71+
List<String> listTmp = xpathSelector.selectList(getHtml(response1));
72+
System.out.println(listTmp.size());
6373
entity1 = response1.getEntity();
6474
EntityUtils.consume(entity1);
6575
}
@@ -105,4 +115,29 @@ public static String getCookie(HttpResponse httpResponse) {
105115
return null;
106116
}
107117

118+
public static String getHtml(HttpResponse response) {
119+
BufferedReader br = null;
120+
try {
121+
InputStream responseBody = response.getEntity().getContent();
122+
StringBuilder stringBuilder = new StringBuilder();
123+
br = new BufferedReader(new InputStreamReader(
124+
responseBody));
125+
String line = null;
126+
while ((line = br.readLine()) != null) {
127+
stringBuilder.append("\n" + line);
128+
}
129+
return stringBuilder.toString();
130+
} catch (IllegalStateException e) {
131+
System.out.println(e.getMessage());
132+
} catch (IOException e) {
133+
System.out.println(e.getMessage());
134+
} finally {
135+
try {
136+
br.close();
137+
} catch (IOException e) {
138+
System.out.println(e.getMessage());
139+
}
140+
}
141+
return null;
142+
}
108143
}

src/main/java/downloader/HttpClientDownloader.java

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import java.util.Map.Entry;
1414

1515
import org.apache.http.Header;
16-
import org.apache.http.HeaderIterator;
1716
import org.apache.http.HttpEntity;
1817
import org.apache.http.HttpResponse;
1918
import org.apache.http.NameValuePair;
@@ -30,7 +29,8 @@
3029
import org.apache.log4j.Logger;
3130
import org.apache.log4j.PropertyConfigurator;
3231

33-
import us.codecraft.webmagic.selector.XpathSelector;
32+
import selector.XpathSelector;
33+
3434

3535
public class HttpClientDownloader implements Downloader {
3636
private static final String ALGORITHMS = "https://www.leetcode.com/problemset/algorithms/";
@@ -163,9 +163,7 @@ public List<String> doDispatcher(String url, String pattern) {
163163
HttpEntity entity1 = response1.getEntity();
164164
EntityUtils.consume(entity1);
165165
return listTmp;
166-
} catch (ClientProtocolException e) {
167-
System.out.println(e.getMessage());
168-
} catch (IOException e) {
166+
} catch (Exception e) {
169167
System.out.println(e.getMessage());
170168
}
171169
return null;
@@ -199,9 +197,6 @@ public static void printResponse(HttpResponse httpResponse)
199197
// System.out.println("response content:"
200198
// + responseString.replace("\r\n", ""));
201199
// }
202-
PropertyConfigurator.configure("test.log");
203-
Logger m_log = Logger.getLogger(HttpClientDownloader.class);
204-
m_log.debug("hello world");
205200
}
206201

207202
/**

src/main/java/scheduler/DispatchScheduler.java

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@ public class DispatchScheduler implements Scheduler {
1616
private static final String getProblemLinkPath = "//table[@class='table table-striped table-centered']/tbody/tr/td/a/text()"; //得到题目名称Xpath路径
1717
private static final String submissionLinkPath = "//div[@class='row']/div/div/a/@href"; //进入题目提交页面Xpath路径
1818
private static final String codePagePath = "//table[@id='result_testcases']/tbody/tr/td/a/@href"; //进入题目代码页面Xpath路径
19-
private static final String codePageStatusPath = "//table[@id='result_testcases]/t"
20-
+ "body/tr/td/a[@class='status-accepted text-success']/strong/text()"; //题目提交状态Xpath路径
19+
// private static final String codePageStatusPath = "//table[@id='result_testcases]/t"
20+
// + "body/tr/td/a[@class='status-accepted text-success']/strong/text()"; //题目提交状态Xpath路径
21+
// private static final String codePageStatusPath = "//div[@class='row']/div/div/a/@href";
22+
private static final String codePageStatusPath = "//a[@class='text-danger status-accepted']/@href";
2123
private Logger myLog;
2224
/**
2325
*
@@ -35,7 +37,7 @@ public void startProcess() {
3537
try {
3638
myLog.debug("进入startProcess,试图初始化task queue");
3739
List<String> list = downloader.problemListDownloader(problemLinkPath);
38-
setTask(list);
40+
setTask(list, null);
3941
} catch (InterruptedException e) {
4042
e.printStackTrace();
4143
}
@@ -77,19 +79,16 @@ public void taskProcceed() {
7779
* @since 1.0.0
7880
*/
7981
public void getDownloadResult(ParserTask task) throws InterruptedException {
80-
if (task.getType() == ParserTask.TaskType.GET_SUBMISION_URL) {
81-
myLog.debug("正在获取问题提交列表url");
82-
List<String> list = downloader.problemDescriptionDownloader(task.getUrl(), submissionLinkPath);
83-
setTask(list);
84-
} else if (task.getType() == ParserTask.TaskType.GET_CODE_URL) {
85-
myLog.debug("正在获取代码页面url");
86-
List<String> list = downloader.submissionListDownloader(task.getUrl(), codePagePath);
87-
setTask(list);
88-
} else if (task.getType() == ParserTask.TaskType.GET_CODE) {
89-
myLog.debug("正在获取代码");
90-
//do something to get code
82+
if (task.getType().equals(ParserTask.TaskType.SUBMISION_URL)) {
83+
myLog.debug("该任务为获取问题代码");
84+
List<String> list = downloader.problemDescriptionDownloader(task.getUrl(), codePageStatusPath);
85+
setTask(list, task.getType());
86+
} else if (task.getType().equals(ParserTask.TaskType.CODE_PAGE_URL)) {
87+
//获取代码
9188
} else {
92-
System.out.println("无法识别该url,请填写正确的url。");
89+
myLog.debug("正在获取问题提交列表,url为: " + task.getUrl());
90+
List<String> list = downloader.problemDescriptionDownloader(task.getUrl(), submissionLinkPath);
91+
setTask(list, task.getType());
9392
}
9493
}
9594

@@ -102,10 +101,18 @@ public void getDownloadResult(ParserTask task) throws InterruptedException {
102101
* @exception
103102
* @since 1.0.0
104103
*/
105-
public void setTask(List<String> list) throws InterruptedException {
104+
public void setTask(List<String> list, ParserTask.TaskType type) throws InterruptedException {
105+
if (list.size() == 0) return;
106+
if (type != null && type.equals(ParserTask.TaskType.SUBMISION_URL)) {
107+
ParserTask task = new ParserTask(new StringBuilder("https://leetcode.com").append(list.get(0)).toString());
108+
myLog.debug("正在进行入栈操作,task的url为: " + task.getUrl());
109+
task.isType();
110+
queue.put(task);
111+
return;
112+
}
106113
for (String value: list) {
107114
ParserTask task = new ParserTask(new StringBuilder("https://leetcode.com").append(value).toString());
108-
myLog.debug("task的url为: " + task.getUrl());
115+
myLog.debug("正在进行入栈操作,task的url为: " + task.getUrl());
109116
task.isType();
110117
queue.put(task);
111118
}

src/main/java/scheduler/ParserTask.java

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ public class ParserTask implements Comparable<ParserTask>{
1010
private static final String submissionListURL = ".*/submissions/$"; // 检验是否是题目提交列表页面
1111
private static final String problemCodeURL = ".*/submissions/detail/.*"; // 检验是否是题目代码页面
1212
private static final String problemListURL = ".*/problemset/algorithms/$"; // 检验是否是题目列表页面
13+
private static final String problemDescription = ".*/problems/.*";
1314
private Logger myLog;
1415

1516
/**
@@ -21,7 +22,7 @@ public class ParserTask implements Comparable<ParserTask>{
2122
*
2223
*/
2324
static enum TaskType {
24-
GET_PROBLEM_LIST_URL, GET_SUBMISION_URL, GET_CODE_URL, GET_CODE
25+
PROBLEM_URL, SUBMISION_URL, CODE_PAGE_URL
2526
}
2627

2728
private String url;
@@ -71,14 +72,15 @@ public TaskType getType() {
7172
* @since 1.0.0
7273
*/
7374
public void isType() {
74-
if (checkPattern(url, problemListURL)) {
75-
type = TaskType.GET_PROBLEM_LIST_URL;
75+
if (checkPattern(url, submissionListURL)) {
76+
myLog.debug("设定task类型为SUBMISION_URL");
77+
type = TaskType.SUBMISION_URL;
7678
} else if (checkPattern(url, problemCodeURL)) {
77-
type = TaskType.GET_CODE_URL;
78-
} else if (checkPattern(url, submissionListURL)) {
79-
type = TaskType.GET_SUBMISION_URL;
79+
myLog.debug("设定task类型为CODE_PAGE_URL");
80+
type = TaskType.CODE_PAGE_URL;
8081
} else {
81-
type = TaskType.GET_CODE;
82+
myLog.debug("设定task类型为PROBLEM_URL");
83+
type = TaskType.PROBLEM_URL;
8284
}
8385
}
8486

@@ -103,6 +105,6 @@ public boolean checkPattern(String url, String patternString) {
103105
* 复写Comparable接口方法
104106
*/
105107
public int compareTo(ParserTask o) {
106-
return o.type.ordinal()-this.type.ordinal();
108+
return this.type.ordinal()-o.type.ordinal();
107109
}
108110
}

0 commit comments

Comments
 (0)