Skip to content

Commit bc9e4ab

Browse files
committed
delete download method
1 parent 6afeb90 commit bc9e4ab

File tree

1 file changed

+125
-138
lines changed

1 file changed

+125
-138
lines changed

src/main/java/downloader/HttpClientDownloader.java

Lines changed: 125 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
import org.apache.http.impl.client.HttpClients;
2828
import org.apache.http.message.BasicNameValuePair;
2929
import org.apache.http.util.EntityUtils;
30+
import org.apache.log4j.Logger;
31+
import org.apache.log4j.PropertyConfigurator;
3032

3133
import us.codecraft.webmagic.selector.XpathSelector;
3234

@@ -40,38 +42,43 @@ public class HttpClientDownloader implements Downloader {
4042
private static final String problemCodeURL = ".*/submissions/detail/.*"; //检验是否是题目代码页面
4143
private static final String problemListURL = ".*/problemset/algorithms/$"; //检验是否是题目列表页面
4244

43-
private static final String problemLinkPath = "//table[@class='table table-striped table-centered']/tbody/tr/td/a/@href"; //进入问题描述 Xpath路径
44-
private static final String getProblemLinkPath = "//table[@class='table table-striped table-centered']/tbody/tr/td/a/text()"; //得到题目名称Xpath路径
45-
private static final String submissionLinkPath = "//div[@class='row']/div/div/a/@href"; //进入题目提交页面Xpath路径
46-
private static final String codePagePath = "//table[@id='result_testcases']/tbody/tr/td/a/@href"; //进入题目代码页面Xpath路径
47-
private static final String codePageStatusPath = "//table[@id='result_testcases]/t"
48-
+ "body/tr/td/a[@class='status-accepted text-success']/strong/text()"; //题目提交状态Xpath路径
49-
5045
private static final String problemNamePath = "//div[@class='col-md-12']/h4/a/text()"; //题目名称Xpath路径
5146
private static final String codePath = "//div[@class='ace_content']/text()"; //得到代码Xpath路径
5247
private static CloseableHttpClient httpClient;
5348
private static final HttpClientDownloader downloader = new HttpClientDownloader();
49+
private static Logger myLog;
5450

5551
public static HttpClientDownloader getInstance() {
5652
return downloader;
5753
}
54+
55+
/**
56+
*
57+
* init(初始化httpclient,以单例模式保存httpclient)
58+
* @param username
59+
* @param password
60+
*void
61+
* @exception
62+
* @since 1.0.0
63+
*/
5864
public static void init(String username, String password) {
65+
PropertyConfigurator.configure("test.log");
66+
myLog = Logger.getLogger(HttpClientDownloader.class);
5967
httpClient = HttpClients.createDefault();
6068
HttpGet httpGet = new HttpGet(INDEX_URL);
6169
CloseableHttpResponse response1;
6270
String cookieString = null;
6371
HttpEntity entity1;
6472
try {
6573
response1 = httpClient.execute(httpGet);
66-
printResponse(response1);
74+
//printResponse(response1);
75+
myLog.debug("第一次登录,试图获取cookie值,response状态为:" + response1.getStatusLine());
6776
cookieString = getCookie(response1);
6877
entity1 = response1.getEntity();
6978
EntityUtils.consume(entity1);
7079
} catch (ClientProtocolException e1) {
71-
// TODO Auto-generated catch block
7280
e1.printStackTrace();
7381
} catch (IOException e1) {
74-
// TODO Auto-generated catch block
7582
e1.printStackTrace();
7683
}
7784

@@ -89,7 +96,8 @@ public static void init(String username, String password) {
8996
try {
9097
httpPost.setEntity(new UrlEncodedFormEntity(getParam(map), "UTF-8"));
9198
response1 = httpClient.execute(httpPost);
92-
printResponse(response1);
99+
//printResponse(response1);
100+
myLog.debug("试图登录leetcode网站, response状态为: " + response1.getStatusLine());
93101
entity1 = response1.getEntity();
94102
EntityUtils.consume(entity1);
95103
} catch (UnsupportedEncodingException e) {
@@ -99,115 +107,112 @@ public static void init(String username, String password) {
99107
} catch (IOException e) {
100108
System.out.println(e.getMessage());
101109
}
110+
myLog.debug("HttpClientDownloader初始化完成");
102111
}
103112

104113
public void setTread(Thread thread) {
105-
// TODO Auto-generated method stub
114+
115+
}
116+
117+
/**
118+
* 获取问题列表
119+
*/
120+
public List<String> problemListDownloader(String pattern) {
121+
myLog.debug("尝试获取问题列表");
122+
return doDispatcher(ALGORITHMS, pattern);
123+
}
106124

125+
/**
126+
* 根据问题问题描述信息所在的url,爬取问题提交列表对应的url
127+
*/
128+
public List<String> problemDescriptionDownloader(String url, String pattern) {
129+
myLog.debug("尝试获取问题提交记录列表");
130+
return doDispatcher(url, pattern);
131+
}
132+
133+
public List<String> submissionListDownloader(String url, String pattern) {
134+
myLog.debug("尝试获取代码页面url");
135+
return doDispatcher(url, pattern);
107136
}
108137

109-
public void download(String url) {
110-
CloseableHttpClient httpClient = HttpClients.createDefault();
111-
HttpGet httpGet = new HttpGet("https://leetcode.com");
112-
CloseableHttpResponse response1;
113-
String cookieString = null;
114-
HttpEntity entity1;
115-
try {
116-
response1 = httpClient.execute(httpGet);
117-
printResponse(response1);
118-
cookieString = getCookie(response1);
119-
entity1 = response1.getEntity();
120-
EntityUtils.consume(entity1);
121-
} catch (ClientProtocolException e1) {
122-
System.out.println(e1.getMessage());
123-
} catch (IOException e1) {
124-
System.out.println(e1.getMessage());
125-
}
138+
public List<String> codePageDownloader(String url, String pattern) {
139+
myLog.debug("尝试获取代码");
140+
//爬取代码
141+
return null;
142+
}
126143

127-
HttpPost httpPost = new HttpPost("https://leetcode.com/accounts/login/");
128-
httpPost.addHeader(
129-
"User-Agent",
130-
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6)Gecko/20091201 Firefox/3.5.6");
131-
httpPost.addHeader("Referer", "https://leetcode.com/accounts/login/");
132-
httpPost.addHeader("Origin", "https://leetcode.com");
133-
Map<String, String> map = new HashMap<String, String>();
134-
map.put("login", "tanghaodong25@163.com");
135-
map.put("password", "thd04180015");
136-
map.put("csrfmiddlewaretoken", cookieString);
137-
map.put("remember", "on");
144+
/**
145+
*
146+
* doDispatcher()
147+
* (适用于获取问题详情url和获取问题提交列表url)
148+
* @param url
149+
* @param blockingQueue
150+
*void
151+
* @exception
152+
* @since 1.0.0
153+
*/
154+
public List<String> doDispatcher(String url, String pattern) {
155+
HttpGet httpGet1 = new HttpGet(url);
156+
myLog.debug("进入doDispatcher函数,准备对"+url+"进行解析");
138157
try {
139-
httpPost.setEntity(new UrlEncodedFormEntity(getParam(map), "UTF-8"));
140-
response1 = httpClient.execute(httpPost);
141-
printResponse(response1);
142-
entity1 = response1.getEntity();
158+
HttpResponse response1 = httpClient.execute(httpGet1);
159+
myLog.debug("正在对"+url+"进行访问,response状态为:" + response1.getStatusLine());
160+
myLog.debug("试图对"+url+"进行解析,其中pattern为:"+pattern);
161+
XpathSelector xpathSelector = new XpathSelector(pattern);
162+
List<String> listTmp = xpathSelector.selectList(getHtml(response1));
163+
HttpEntity entity1 = response1.getEntity();
143164
EntityUtils.consume(entity1);
144-
} catch (UnsupportedEncodingException e) {
145-
System.out.println(e.getMessage());
165+
return listTmp;
146166
} catch (ClientProtocolException e) {
147167
System.out.println(e.getMessage());
148168
} catch (IOException e) {
149169
System.out.println(e.getMessage());
150170
}
171+
return null;
151172
}
152-
153-
public static List<NameValuePair> getParam(Map parameterMap) {
154-
List<NameValuePair> param = new ArrayList<NameValuePair>();
155-
Iterator it = parameterMap.entrySet().iterator();
156-
while (it.hasNext()) {
157-
Entry parmEntry = (Entry) it.next();
158-
param.add(new BasicNameValuePair((String) parmEntry.getKey(),
159-
(String) parmEntry.getValue()));
160-
}
161-
return param;
162-
}
163-
173+
174+
/**
175+
*
176+
* printResponse(打印response信息)
177+
* @param httpResponse
178+
* @throws ParseException
179+
* @throws IOException
180+
*void
181+
* @exception
182+
* @since 1.0.0
183+
*/
164184
public static void printResponse(HttpResponse httpResponse)
165185
throws ParseException, IOException {
166186
// 获取响应消息实体
167187
HttpEntity entity = httpResponse.getEntity();
168188
// 响应状态
169189
System.out.println("status:" + httpResponse.getStatusLine());
170190
System.out.println("headers:");
171-
HeaderIterator iterator = httpResponse.headerIterator();
172-
while (iterator.hasNext()) {
173-
System.out.println("\t" + iterator.next());
174-
}
175-
// 判断响应实体是否为空
176-
if (entity != null) {
177-
String responseString = EntityUtils.toString(entity);
178-
System.out.println("response length:" + responseString.length());
179-
System.out.println("response content:"
180-
+ responseString.replace("\r\n", ""));
181-
}
182-
}
183-
184-
public static String getCookie(HttpResponse httpResponse) {
185-
Header[] headers = httpResponse.getAllHeaders();
186-
for (Header value : headers) {
187-
if (value.getName().equals("Set-Cookie")) {
188-
return value.getValue().split(";")[0].split("=")[1];
189-
}
190-
}
191-
return null;
191+
// HeaderIterator iterator = httpResponse.headerIterator();
192+
// while (iterator.hasNext()) {
193+
// System.out.println("\t" + iterator.next());
194+
// }
195+
// // 判断响应实体是否为空
196+
// if (entity != null) {
197+
// String responseString = EntityUtils.toString(entity);
198+
// System.out.println("response length:" + responseString.length());
199+
// System.out.println("response content:"
200+
// + responseString.replace("\r\n", ""));
201+
// }
202+
PropertyConfigurator.configure("test.log");
203+
Logger m_log = Logger.getLogger(HttpClientDownloader.class);
204+
m_log.debug("hello world");
192205
}
193206

194-
public List<String> problemListDownloader() {
195-
return doDispatcher(ALGORITHMS);
196-
}
197-
198-
public List<String> problemDescriptionDownloader(String url) {
199-
return doDispatcher(url);
200-
}
201-
202-
public List<String> submissionListDownloader(String url, String name) {
203-
return doDispatcher(url, name, httpClient);
204-
}
205-
206-
public List<String> codePageDownloader(String url) {
207-
//爬取代码
208-
return null;
209-
}
210-
207+
/**
208+
*
209+
* getHtml(辅助方法,根据response信息返回String类型的html内容)
210+
* @param response
211+
* @return
212+
*String
213+
* @exception
214+
* @since 1.0.0
215+
*/
211216
public String getHtml(HttpResponse response) {
212217
BufferedReader br = null;
213218
try {
@@ -233,61 +238,43 @@ public String getHtml(HttpResponse response) {
233238
}
234239
return null;
235240
}
241+
236242
/**
237243
*
238-
* doDispatcher()
239-
* (适用于获取问题详情url和获取问题提交列表url)
240-
* @param url
241-
* @param blockingQueue
242-
*void
244+
* getCookie(获取leetcode网站的Set-Cookie值)
245+
* @param httpResponse
246+
* @return
247+
*String
243248
* @exception
244249
* @since 1.0.0
245250
*/
246-
public List<String> doDispatcher(String url) {
247-
HttpGet httpGet1 = new HttpGet(url);
248-
try {
249-
HttpResponse response1 = httpClient.execute(httpGet1);
250-
printResponse(response1);
251-
252-
XpathSelector xpathSelector = new XpathSelector(problemLinkPath);
253-
List<String> listTmp = xpathSelector.selectList(getHtml(response1));
254-
HttpEntity entity1 = response1.getEntity();
255-
EntityUtils.consume(entity1);
256-
return listTmp;
257-
} catch (ClientProtocolException e) {
258-
System.out.println(e.getMessage());
259-
} catch (IOException e) {
260-
System.out.println(e.getMessage());
251+
public static String getCookie(HttpResponse httpResponse) {
252+
Header[] headers = httpResponse.getAllHeaders();
253+
for (Header value : headers) {
254+
if (value.getName().equals("Set-Cookie")) {
255+
return value.getValue().split(";")[0].split("=")[1];
256+
}
261257
}
262258
return null;
263259
}
260+
264261
/**
265262
*
266-
* doDispatcher()
267-
* (用于获取accept代码url)
268-
* @param url
269-
* @param blockingQueue
270-
* @param name
271-
*void
263+
* getParam(辅助方法)
264+
* @param parameterMap
265+
* @return
266+
*List<NameValuePair>
272267
* @exception
273268
* @since 1.0.0
274269
*/
275-
public List<String> doDispatcher(String url, String name, CloseableHttpClient httpClient) {
276-
HttpGet httpGet1 = new HttpGet(url);
277-
try {
278-
HttpResponse response1 = httpClient.execute(httpGet1);
279-
printResponse(response1);
280-
281-
XpathSelector xpathSelector = new XpathSelector(problemLinkPath);
282-
List<String> listTmp = xpathSelector.selectList(getHtml(response1));
283-
HttpEntity entity1 = response1.getEntity();
284-
EntityUtils.consume(entity1);
285-
return listTmp;
286-
} catch (ClientProtocolException e) {
287-
System.out.println(e.getMessage());
288-
} catch (IOException e) {
289-
System.out.println(e.getMessage());
270+
public static List<NameValuePair> getParam(Map parameterMap) {
271+
List<NameValuePair> param = new ArrayList<NameValuePair>();
272+
Iterator it = parameterMap.entrySet().iterator();
273+
while (it.hasNext()) {
274+
Entry parmEntry = (Entry) it.next();
275+
param.add(new BasicNameValuePair((String) parmEntry.getKey(),
276+
(String) parmEntry.getValue()));
290277
}
291-
return null;
278+
return param;
292279
}
293280
}

0 commit comments

Comments
 (0)