Skip to content

Commit a398338

Browse files
committed
update
1 parent 0a2556d commit a398338

File tree

14 files changed

+1773
-137
lines changed

14 files changed

+1773
-137
lines changed

pom.xml

Lines changed: 35 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,41 @@
11
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2-
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3-
<modelVersion>4.0.0</modelVersion>
2+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3+
<modelVersion>4.0.0</modelVersion>
44

5-
<groupId>haodong.net.cn</groupId>
6-
<artifactId>leetcode2github</artifactId>
7-
<version>0.0.1-SNAPSHOT</version>
8-
<packaging>jar</packaging>
5+
<groupId>haodong.net.cn</groupId>
6+
<artifactId>leetcode2github</artifactId>
7+
<version>0.0.1-SNAPSHOT</version>
8+
<packaging>jar</packaging>
99

10-
<name>leetcode2github</name>
11-
<url>http://maven.apache.org</url>
10+
<name>leetcode2github</name>
11+
<url>http://maven.apache.org</url>
1212

13-
<properties>
14-
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
15-
</properties>
13+
<properties>
14+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
15+
</properties>
1616

17-
<dependencies>
18-
<dependency>
19-
<groupId>junit</groupId>
20-
<artifactId>junit</artifactId>
21-
<version>3.8.1</version>
22-
<scope>test</scope>
23-
</dependency>
24-
<!-- webmagic模块 -->
25-
<dependency>
26-
<groupId>us.codecraft</groupId>
27-
<artifactId>webmagic-core</artifactId>
28-
<version>0.4.2</version>
29-
</dependency>
30-
<dependency>
31-
<groupId>us.codecraft</groupId>
32-
<artifactId>webmagic-extension</artifactId>
33-
<version>0.4.2</version>
34-
</dependency>
35-
</dependencies>
17+
<dependencies>
18+
<dependency>
19+
<groupId>junit</groupId>
20+
<artifactId>junit</artifactId>
21+
<version>3.8.1</version>
22+
<scope>test</scope>
23+
</dependency>
24+
<!-- webmagic模块 -->
25+
<dependency>
26+
<groupId>us.codecraft</groupId>
27+
<artifactId>webmagic-core</artifactId>
28+
<version>0.4.2</version>
29+
</dependency>
30+
<dependency>
31+
<groupId>us.codecraft</groupId>
32+
<artifactId>webmagic-extension</artifactId>
33+
<version>0.4.2</version>
34+
</dependency>
35+
<dependency>
36+
<groupId>org.apache.httpcomponents</groupId>
37+
<artifactId>httpclient</artifactId>
38+
<version>4.4</version>
39+
</dependency>
40+
</dependencies>
3641
</project>

src/main/java/MultiSpider.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
2+
3+
import org.apache.http.impl.client.CloseableHttpClient;
4+
5+
public class MultiSpider implements Runnable {
6+
private CloseableHttpClient httpClient;
7+
public MultiSpider(CloseableHttpClient httpClient) {
8+
this.httpClient = httpClient;
9+
}
10+
public void run() {
11+
12+
}
13+
14+
}
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
package console;
2+
3+
import java.io.BufferedReader;
4+
import java.io.IOException;
5+
import java.io.InputStream;
6+
import java.io.InputStreamReader;
7+
import java.util.ArrayList;
8+
import java.util.HashMap;
9+
import java.util.Iterator;
10+
import java.util.List;
11+
import java.util.Map;
12+
import java.util.Map.Entry;
13+
14+
import org.apache.http.Header;
15+
import org.apache.http.HeaderIterator;
16+
import org.apache.http.HttpEntity;
17+
import org.apache.http.HttpResponse;
18+
import org.apache.http.NameValuePair;
19+
import org.apache.http.ParseException;
20+
import org.apache.http.client.ClientProtocolException;
21+
import org.apache.http.client.entity.UrlEncodedFormEntity;
22+
import org.apache.http.client.methods.CloseableHttpResponse;
23+
import org.apache.http.client.methods.HttpGet;
24+
import org.apache.http.client.methods.HttpPost;
25+
import org.apache.http.impl.client.CloseableHttpClient;
26+
import org.apache.http.impl.client.HttpClients;
27+
import org.apache.http.message.BasicNameValuePair;
28+
import org.apache.http.util.EntityUtils;
29+
30+
import us.codecraft.webmagic.selector.XpathSelector;
31+
32+
public class HttpClientExample {
33+
private List<String> ProblemDescriptionPath = new ArrayList<String>();
34+
private List<String> ProblemSubmissionPath = new ArrayList<String>();
35+
private Map<String, List<String>> problemCodePath = new HashMap<String, List<String>>();
36+
37+
public static void main(String[] args) throws ClientProtocolException,
38+
IOException {
39+
CloseableHttpClient httpClient = HttpClients.createDefault();
40+
HttpGet httpGet = new HttpGet("https://leetcode.com");
41+
CloseableHttpResponse response1 = httpClient.execute(httpGet);
42+
// printResponse(response1);
43+
String cookieString = getCookie(response1);
44+
HttpEntity entity1 = response1.getEntity();
45+
EntityUtils.consume(entity1);
46+
47+
HttpPost httpPost = new HttpPost("https://leetcode.com/accounts/login/");
48+
httpPost.addHeader(
49+
"User-Agent",
50+
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6)Gecko/20091201 Firefox/3.5.6");
51+
httpPost.addHeader("Referer", "https://leetcode.com/accounts/login/");
52+
httpPost.addHeader("Origin", "https://leetcode.com");
53+
Map<String, String> map = new HashMap<String, String>();
54+
map.put("login", "tanghaodong25@163.com");
55+
map.put("password", "thd04180015");
56+
map.put("csrfmiddlewaretoken", cookieString);
57+
map.put("remember", "on");
58+
httpPost.setEntity(new UrlEncodedFormEntity(getParam(map), "UTF-8"));
59+
response1 = httpClient.execute(httpPost);
60+
HeaderIterator iterator = response1.headerIterator();
61+
// while (iterator.hasNext()) {
62+
// iterator.next();
63+
// }
64+
// printResponse(response1);
65+
entity1 = response1.getEntity();
66+
EntityUtils.consume(entity1);
67+
68+
HttpGet httpGet1 = new HttpGet(
69+
"https://leetcode.com/problems/rotate-array/submissions/");
70+
response1 = httpClient.execute(httpGet1);
71+
printResponse(response1);
72+
entity1 = response1.getEntity();
73+
EntityUtils.consume(entity1);
74+
}
75+
76+
public static List<NameValuePair> getParam(Map parameterMap) {
77+
List<NameValuePair> param = new ArrayList<NameValuePair>();
78+
Iterator it = parameterMap.entrySet().iterator();
79+
while (it.hasNext()) {
80+
Entry parmEntry = (Entry) it.next();
81+
param.add(new BasicNameValuePair((String) parmEntry.getKey(),
82+
(String) parmEntry.getValue()));
83+
}
84+
return param;
85+
}
86+
@SuppressWarnings("unused")
87+
public static void printResponse(HttpResponse httpResponse)
88+
throws ParseException, IOException {
89+
// 获取响应消息实体
90+
91+
HttpEntity entity = httpResponse.getEntity();
92+
// 响应状态
93+
System.out.println("status:" + httpResponse.getStatusLine());
94+
System.out.println("headers:");
95+
HeaderIterator iterator = httpResponse.headerIterator();
96+
while (iterator.hasNext()) {
97+
System.out.println("\t" + iterator.next());
98+
}
99+
// 判断响应实体是否为空
100+
// if (entity != null) {
101+
// String responseString = EntityUtils.toString(entity);
102+
// System.out.println("response length:" + responseString.length());
103+
// System.out.println("response content:"
104+
// + responseString.replace("\r\n", ""));
105+
// }
106+
InputStream responseBody = httpResponse.getEntity().getContent();
107+
StringBuilder stringBuilder = new StringBuilder();
108+
BufferedReader br = new BufferedReader(new InputStreamReader(
109+
responseBody));
110+
String line = null;
111+
while ((line = br.readLine()) != null) {
112+
stringBuilder.append("\n"+line);
113+
}
114+
System.out.println(stringBuilder.toString());
115+
XpathSelector xpathSelector = new XpathSelector("//table[@id='result_testcases']/tbody/tr/td/a[@class'status-accepted text-success']/@href");
116+
System.out.println(xpathSelector.selectList(stringBuilder.toString()));
117+
}
118+
119+
public static String getCookie(HttpResponse httpResponse) {
120+
Header[] headers = httpResponse.getAllHeaders();
121+
for (Header value : headers) {
122+
if (value.getName().equals("Set-Cookie")) {
123+
return value.getValue().split(";")[0].split("=")[1];
124+
}
125+
}
126+
return null;
127+
}
128+
129+
}

src/main/java/haodong/net/cn/test/Test.java renamed to src/main/java/console/URLConnectionExample.java

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package haodong.net.cn.test;
1+
package console;
22

33
import java.io.BufferedReader;
44
import java.io.IOException;
@@ -17,18 +17,19 @@
1717
* @author haodong
1818
*
1919
*/
20-
public class Test {
20+
public class URLConnectionExample {
2121
private static final String PASSWORD = "thd04180015";
2222
private static final String USERNAME = "tanghaodong25@163.com";
2323
private static final String LOGIN_STRING = "https://www.leetcode.com/accounts/login/";
2424
private static final String INDEX_STRING = "https://www.leetcode.com";
2525
private static final String ALGORITHMS = "https://www.leetcode.com/problemset/algorithms/";
2626
private static String cookie = null;
27-
27+
HttpURLConnection connection = null;
2828
public static void main(String[] args) throws IOException {
29-
Test test = new Test();
29+
URLConnectionExample test = new URLConnectionExample();
3030
test.getToken("tanghaodong25@163.com", "thd04180015", LOGIN_STRING);
3131
test.getAlgorithmsPage(ALGORITHMS);
32+
// CodeProcesser.portal(ALGORITHMS, cookie);
3233
}
3334
/**
3435
* 获得认证,得到登录token
@@ -37,7 +38,7 @@ public static void main(String[] args) throws IOException {
3738
* @param urlString
3839
*/
3940
public void getToken(String name, String password, String urlString) {
40-
HttpURLConnection connection = null;
41+
4142
OutputStream out = null;
4243
try {
4344
URL url = new URL(urlString);
@@ -117,8 +118,8 @@ public String getCookie(String urlString) {
117118
*/
118119
public void getAlgorithmsPage(String urlString) {
119120
try {
120-
URL url = new URL(urlString);
121-
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
121+
URL url = new URL("https://leetcode.com/problems/rotate-array/submissions/");
122+
connection = (HttpURLConnection) url.openConnection();
122123
connection.setRequestProperty("Cookie", cookie);
123124
BufferedReader br = new BufferedReader(new InputStreamReader(
124125
connection.getInputStream()));
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package container;
2+
3+
import java.util.ArrayList;
4+
import java.util.HashMap;
5+
import java.util.List;
6+
import java.util.Map;
7+
import java.util.concurrent.ArrayBlockingQueue;
8+
import java.util.concurrent.BlockingQueue;
9+
/**
10+
* 容器类
11+
*
12+
* URLContainer
13+
*
14+
* kin
15+
* kin
16+
* 2015年3月29日 上午6:50:58
17+
*
18+
* @version 1.0.0
19+
*
20+
*/
21+
public class URLContainer {
22+
protected BlockingQueue<String> problemQueue = new ArrayBlockingQueue<String>(200);
23+
protected BlockingQueue<String> problemSubmission = new ArrayBlockingQueue<String>(50);
24+
protected BlockingQueue<String> problemCodePage = new ArrayBlockingQueue<String>(100);
25+
protected Map<String, String> problemCode = new HashMap<String, String>();
26+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
package downloader;
2+
3+
import java.util.List;
4+
5+
public interface Downloader {
6+
/**
7+
* set thread for downloader
8+
* @param thread
9+
*/
10+
public void setTread(Thread thread);
11+
12+
/**
13+
* according to the url given, download aimed file(url or code) and add to list
14+
* @param url
15+
* @param list
16+
*/
17+
public void download(String url);
18+
19+
/**
20+
* 爬取题目列表
21+
* @param url
22+
*/
23+
public void problemListDownloader(String url);
24+
25+
/**
26+
* 进入题目描述页面,爬取题目提交url列表
27+
* @param url
28+
*/
29+
public void problemDescriptionDownloader(String url);
30+
31+
/**
32+
* 进入题目提交列表页面,爬取code所在页面url
33+
* @param url
34+
*/
35+
public void submissionListDownloader(String url, String name);
36+
37+
/**
38+
* 进入code所在页面,爬取code
39+
* @param url
40+
*/
41+
public void codePageDownloader(String url);
42+
}

0 commit comments

Comments
 (0)