|
3 | 3 | import java.util.List; |
4 | 4 | import java.util.concurrent.BlockingQueue; |
5 | 5 |
|
| 6 | +import org.apache.log4j.Logger; |
| 7 | +import org.apache.log4j.PropertyConfigurator; |
| 8 | + |
6 | 9 | import container.Container; |
7 | 10 | import downloader.HttpClientDownloader; |
8 | 11 |
|
9 | 12 | public class DispatchScheduler implements Scheduler { |
10 | | - private volatile boolean stoped; |
11 | | - private ParserTask task; |
12 | 13 | private HttpClientDownloader downloader = HttpClientDownloader.getInstance();; |
13 | | - |
14 | 14 | private static BlockingQueue<ParserTask> queue = Container.getQueue(); |
| 15 | + private static final String problemLinkPath = "//table[@class='table table-striped table-centered']/tbody/tr/td/a/@href"; //进入问题描述 Xpath路径 |
| 16 | + private static final String getProblemLinkPath = "//table[@class='table table-striped table-centered']/tbody/tr/td/a/text()"; //得到题目名称Xpath路径 |
| 17 | + private static final String submissionLinkPath = "//div[@class='row']/div/div/a/@href"; //进入题目提交页面Xpath路径 |
| 18 | + private static final String codePagePath = "//table[@id='result_testcases']/tbody/tr/td/a/@href"; //进入题目代码页面Xpath路径 |
| 19 | + private static final String codePageStatusPath = "//table[@id='result_testcases]/t" |
| 20 | + + "body/tr/td/a[@class='status-accepted text-success']/strong/text()"; //题目提交状态Xpath路径 |
| 21 | + private Logger myLog; |
| 22 | + /** |
| 23 | + * |
| 24 | + * startProcess(开始任务) |
| 25 | + *void |
| 26 | + * @exception |
| 27 | + * @since 1.0.0 |
| 28 | + */ |
| 29 | + public DispatchScheduler() { |
| 30 | + PropertyConfigurator.configure("test.log"); |
| 31 | + myLog = Logger.getLogger(DispatchScheduler.class); |
| 32 | + } |
15 | 33 | public void startProcess() { |
16 | 34 | if (queue.isEmpty()) { |
17 | 35 | try { |
18 | | - List<String> list = downloader.problemListDownloader(); |
19 | | - for (String value: list) { |
20 | | - task = new ParserTask(value); |
21 | | - task.isType(); |
22 | | - queue.put(task); |
23 | | - } |
| 36 | + myLog.debug("进入startProcess,试图初始化task queue"); |
| 37 | + List<String> list = downloader.problemListDownloader(problemLinkPath); |
| 38 | + setTask(list); |
24 | 39 | } catch (InterruptedException e) { |
25 | | - // TODO Auto-generated catch block |
26 | 40 | e.printStackTrace(); |
27 | 41 | } |
28 | 42 | } else { |
29 | 43 | try { |
30 | | - task = queue.take(); |
31 | | - String url = task.getUrl(); |
32 | | - task = new ParserTask(url); |
33 | | - task.isType(); |
34 | | - queue.put(task); |
| 44 | + ParserTask task = queue.take(); |
| 45 | + getDownloadResult(task); |
35 | 46 | } catch (InterruptedException e) { |
36 | | - // TODO Auto-generated catch block |
37 | 47 | e.printStackTrace(); |
38 | 48 | } |
39 | 49 | } |
40 | 50 | } |
| 51 | + |
| 52 | + /** |
| 53 | + * 爬取过程 |
| 54 | + */ |
41 | 55 | public void taskProcceed() { |
| 56 | + myLog.debug("进入task任务处理"); |
42 | 57 | while (!queue.isEmpty()) { |
43 | 58 | try { |
44 | | - task = queue.take(); |
| 59 | + myLog.debug("处理了一个任务"); |
| 60 | + ParserTask task = queue.take(); |
| 61 | + myLog.debug("取得的task的type类型为: " + task.getType()); |
| 62 | + getDownloadResult(task); |
45 | 63 | } catch (InterruptedException e1) { |
46 | | - // TODO Auto-generated catch block |
47 | 64 | e1.printStackTrace(); |
48 | 65 | } |
49 | | - String url = task.getUrl(); |
50 | | - task = new ParserTask(url); |
| 66 | + } |
| 67 | + } |
| 68 | + |
| 69 | + /** |
| 70 | + * |
| 71 | + * getDownloadResult(根据已有的task获得新的task) |
| 72 | + * @param task |
| 73 | + * @return |
| 74 | + * @throws InterruptedException |
| 75 | + *ParserTask |
| 76 | + * @exception |
| 77 | + * @since 1.0.0 |
| 78 | + */ |
| 79 | + public void getDownloadResult(ParserTask task) throws InterruptedException { |
| 80 | + if (task.getType() == ParserTask.TaskType.GET_SUBMISION_URL) { |
| 81 | + myLog.debug("正在获取问题提交列表url"); |
| 82 | + List<String> list = downloader.problemDescriptionDownloader(task.getUrl(), submissionLinkPath); |
| 83 | + setTask(list); |
| 84 | + } else if (task.getType() == ParserTask.TaskType.GET_CODE_URL) { |
| 85 | + myLog.debug("正在获取代码页面url"); |
| 86 | + List<String> list = downloader.submissionListDownloader(task.getUrl(), codePagePath); |
| 87 | + setTask(list); |
| 88 | + } else if (task.getType() == ParserTask.TaskType.GET_CODE) { |
| 89 | + myLog.debug("正在获取代码"); |
| 90 | + //do something to get code |
| 91 | + } else { |
| 92 | + System.out.println("无法识别该url,请填写正确的url。"); |
| 93 | + } |
| 94 | + } |
| 95 | + |
| 96 | + /** |
| 97 | + * |
| 98 | + * setTask(辅助方法,根据从队列中取得的task,分析,爬取获得新的task) |
| 99 | + * @param list |
| 100 | + * @throws InterruptedException |
| 101 | + *void |
| 102 | + * @exception |
| 103 | + * @since 1.0.0 |
| 104 | + */ |
| 105 | + public void setTask(List<String> list) throws InterruptedException { |
| 106 | + for (String value: list) { |
| 107 | + ParserTask task = new ParserTask(new StringBuilder("https://leetcode.com").append(value).toString()); |
| 108 | + myLog.debug("task的url为: " + task.getUrl()); |
51 | 109 | task.isType(); |
52 | | - try { |
53 | | - queue.put(task); |
54 | | - } catch (InterruptedException e) { |
55 | | - // TODO Auto-generated catch block |
56 | | - e.printStackTrace(); |
57 | | - } |
| 110 | + queue.put(task); |
58 | 111 | } |
59 | 112 | } |
60 | 113 | } |
0 commit comments