Skip to content

Commit 4d3a4a8

Browse files
committed
[python] Added more websites for getting free proxies
1 parent 3b70ceb commit 4d3a4a8

File tree

5 files changed

+119
-31
lines changed

5 files changed

+119
-31
lines changed

Proxy_List_Scrapper/__init__.py

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,26 @@
66

77
import sys
88
import traceback
9-
from re import findall
9+
from re import findall, sub
1010

1111
import requests
1212
from requests.exceptions import ConnectionError
1313

14+
SSL = 'https://www.sslproxies.org/',
15+
GOOGLE = 'https://www.google-proxy.net/',
16+
ANANY = 'https://free-proxy-list.net/anonymous-proxy.html',
17+
UK = 'https://free-proxy-list.net/uk-proxy.html',
18+
US = 'https://www.us-proxy.org/',
19+
NEW = 'https://free-proxy-list.net/',
20+
SPYS_ME = 'http://spys.me/proxy.txt',
21+
PROXYSCRAPE = 'https://api.proxyscrape.com/?request=getproxies&proxytype=all&country=all&ssl=all&anonymity=all',
22+
PROXYNOVA = 'https://www.proxynova.com/proxy-server-list/'
23+
PROXYLIST_DOWNLOAD_HTTP = 'https://www.proxy-list.download/HTTP'
24+
PROXYLIST_DOWNLOAD_HTTPS = 'https://www.proxy-list.download/HTTPS'
25+
PROXYLIST_DOWNLOAD_SOCKS4 = 'https://www.proxy-list.download/SOCKS4'
26+
PROXYLIST_DOWNLOAD_SOCKS5 = 'https://www.proxy-list.download/SOCKS5'
27+
ALL = 'ALL'
28+
1429

1530
class ScrapperException(BaseException):
1631
pass
@@ -20,6 +35,7 @@ class Proxies(object):
2035
"""
2136
Proxies is the response data type of getProxies function
2237
"""
38+
2339
def __init__(self, proxies, category):
2440
"""
2541
Initialize the proxies class
@@ -35,6 +51,7 @@ class Proxy(object):
3551
"""
3652
Proxy is the class for proxy.
3753
"""
54+
3855
def __init__(self, ip, port):
3956
"""
4057
Initialization of the proxy class
@@ -49,6 +66,7 @@ class Scrapper:
4966
"""
5067
Scrapper class is use to scrape the proxies from various websites.
5168
"""
69+
5270
def __init__(self, category='ssl', print_err_trace=True):
5371
"""
5472
Initialization of scrapper class
@@ -58,16 +76,22 @@ def __init__(self, category='ssl', print_err_trace=True):
5876
# init with Empty Proxy List
5977
self.proxies = []
6078
self.category = category
61-
self.Categories = {'SSL': 'https://www.sslproxies.org/',
62-
'GOOGLE': 'https://www.google-proxy.net/',
63-
'ANANY': 'https://free-proxy-list.net/anonymous-proxy.html',
64-
'UK': 'https://free-proxy-list.net/uk-proxy.html',
65-
'US': 'https://www.us-proxy.org/',
66-
'NEW': 'https://free-proxy-list.net/',
67-
'SPYS.ME': 'http://spys.me/proxy.txt',
68-
'proxyscrape': 'https://api.proxyscrape.com/?request=getproxies&proxytype=all&country=all&ssl=all&anonymity=all',
69-
'ALL': 'ALL'
70-
}
79+
self.Categories = {
80+
'SSL': SSL,
81+
'GOOGLE': GOOGLE,
82+
'ANANY': ANANY,
83+
'UK': UK,
84+
'US': US,
85+
'NEW': NEW,
86+
'SPYS.ME': SPYS_ME,
87+
'PROXYSCRAPE': PROXYSCRAPE,
88+
'PROXYNOVA': PROXYNOVA,
89+
'PROXYLIST_DOWNLOAD_HTTP': PROXYLIST_DOWNLOAD_HTTP,
90+
'PROXYLIST_DOWNLOAD_HTTPS': PROXYLIST_DOWNLOAD_HTTPS,
91+
'PROXYLIST_DOWNLOAD_SOCKS4': PROXYLIST_DOWNLOAD_SOCKS4,
92+
'PROXYLIST_DOWNLOAD_SOCKS5': PROXYLIST_DOWNLOAD_SOCKS5,
93+
'ALL': ALL
94+
}
7195
self.print_trace = print_err_trace
7296

7397
def getProxies(self):
@@ -100,6 +124,16 @@ def _get(self):
100124
r = requests.get(url=self.Categories[self.category])
101125
if self.category == 'SPYS.ME' or self.category == 'proxyscrape':
102126
self.proxies = findall(pattern=r'\d+\.\d+\.\d+\.\d+:\d+', string=r.text)
127+
if self.category == 'PROXYNOVA':
128+
matches = findall(
129+
pattern=r'\d+\.\d+\.\d+\.\d+\'\)\;</script>\s*</abbr>\s*</td>\s*<td\salign=\"left\">\s*\d+',
130+
string=r.text)
131+
self.proxies = [sub(r"\'\)\;</script>\s*</abbr>\s*</td>\s*<td\salign=\"left\">\s*", ":", m) for m in
132+
matches]
133+
if self.category in {'PROXYLIST_DOWNLOAD_HTTP', 'PROXYLIST_DOWNLOAD_HTTPS',
134+
'PROXYLIST_DOWNLOAD_SOCKS4', 'PROXYLIST_DOWNLOAD_SOCKS5'}:
135+
matches = findall(pattern=r'\d+\.\d+\.\d+\.\d+</td>\s*<td>\d+', string=r.text)
136+
self.proxies = [sub(r"</td>\s*<td>", ":", m) for m in matches]
103137
else:
104138
matches = findall(pattern=r'\d+\.\d+\.\d+\.\d+</td><td>\d+', string=r.text)
105139
self.proxies = [m.replace('</td><td>', ':') for m in matches]

README.md

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,15 @@
1717
Proxy List Scrapper from various websites.
1818
They gives the free proxies for temporary use.
1919

20+
### What is a proxy
21+
A proxy is server that acts like a gateway or intermediary between any device and the rest of the internet. A proxy accepts and forwards connection requests, then returns data for those requests. This is the basic definition, which is quite limited, because there are dozens of unique proxy types with their own distinct configurations.
22+
23+
### What are the most popular types of proxies:
24+
Residential proxies, Datacenter proxies, Anonymous proxies, Transparent proxies
25+
26+
### People use proxies to:
27+
Avoid Geo-restrictions, Protect Privacy and Increase Security, Avoid Firewalls and Bans, Automate Online Processes, Use Multiple Accounts and Gather Data
28+
2029
#### Chrome Extension in here
2130
you can download the chrome extension "Free Proxy List Scrapper Chrome Extension" folder and load in the extension.<br/>
2231
##### Goto Chrome Extension <a href="https://chrome.google.com/webstore/detail/free-proxy-list-scrapper/jpnflejagpflcemgfnhckkdckpkkfbcc?hl=en-US">click here</a>.
@@ -44,15 +53,20 @@ After that simply create an object of Scrapper class as "scrapper"<br/>
4453

4554
Here Your need to specify category defined as below:<br/>
4655

47-
'SSL': 'https://www.sslproxies.org/',
48-
'GOOGLE': 'https://www.google-proxy.net/',
49-
'ANANY': 'https://free-proxy-list.net/anonymous-proxy.html',
50-
'UK': 'https://free-proxy-list.net/uk-proxy.html',
51-
'US': 'https://www.us-proxy.org/',
52-
'NEW': 'https://free-proxy-list.net/',
53-
'SPYS.ME': 'http://spys.me/proxy.txt',
54-
'proxyscrape': 'https://api.proxyscrape.com/?request=getproxies&proxytype=all&country=all&ssl=all&anonymity=all',
55-
'ALL': 'ALL'
56+
SSL = 'https://www.sslproxies.org/',
57+
GOOGLE = 'https://www.google-proxy.net/',
58+
ANANY = 'https://free-proxy-list.net/anonymous-proxy.html',
59+
UK = 'https://free-proxy-list.net/uk-proxy.html',
60+
US = 'https://www.us-proxy.org/',
61+
NEW = 'https://free-proxy-list.net/',
62+
SPYS_ME = 'http://spys.me/proxy.txt',
63+
PROXYSCRAPE = 'https://api.proxyscrape.com/?request=getproxies&proxytype=all&country=all&ssl=all&anonymity=all',
64+
PROXYNOVA = 'https://www.proxynova.com/proxy-server-list/'
65+
PROXYLIST_DOWNLOAD_HTTP = 'https://www.proxy-list.download/HTTP'
66+
PROXYLIST_DOWNLOAD_HTTPS = 'https://www.proxy-list.download/HTTPS'
67+
PROXYLIST_DOWNLOAD_SOCKS4 = 'https://www.proxy-list.download/SOCKS4'
68+
PROXYLIST_DOWNLOAD_SOCKS5 = 'https://www.proxy-list.download/SOCKS5'
69+
ALL = 'ALL'
5670

5771
These are all categories.<br/>
5872
After you have to call a function named "getProxies"<br/>
@@ -85,9 +99,28 @@ in data having proxies,len,category
8599
print(data.category)
86100

87101
## Author
88-
Sameer Narkhede <br/>
89-
Profile : https://github.com/narkhedesam <br/>
90-
Website : https://narkhedesam.github.io/
102+
<b>Sameer Narkhede</b> <br/>
103+
<p align="left">
104+
<a href="https://github.com/narkhedesam" target="blank">
105+
<img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/github.svg" alt="https://github.com/narkhedesam" height="20" width="20" />
106+
</a>
107+
<a href="https://narkhedesam.com/" target="blank">
108+
<img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/googlechrome.svg" alt="https://narkhedesam.com/" height="20" width="20" />
109+
</a>
110+
<a href="https://www.linkedin.com/in/sameer-narkhede/" target="blank">
111+
<img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/linkedin.svg" alt="https://www.linkedin.com/in/sameer-narkhede/" height="20" width="20" />
112+
</a>
113+
<a href="https://www.facebook.com/narkhedesam" target="blank">
114+
<img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/facebook.svg" alt="https://www.facebook.com/narkhedesam" height="20" width="20" />
115+
</a>
116+
<a href="https://www.instagram.com/sam_narkhede/" target="blank">
117+
<img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/instagram.svg" alt="https://www.instagram.com/sam_narkhede/" height="20" width="20" />
118+
</a>
119+
<a href="https://t.me/narkhedesam" target="blank">
120+
<img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/telegram.svg" alt="https://t.me/narkhedesam" height="20" width="20" />
121+
</a>
122+
123+
</p>
91124

92125
### Thanks for giving free proxies
93126
- https://www.sslproxies.org/
@@ -98,6 +131,8 @@ Website : https://narkhedesam.github.io/
98131
- https://free-proxy-list.net/
99132
- http://spys.me/proxy.txt
100133
- https://proxyscrape.com/
134+
- https://www.proxynova.com/proxy-server-list/
135+
- https://www.proxy-list.download/
101136
<br/><br/>
102137

103138

Web_Scrapper/README.md

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,30 @@ get response from scrape.do api
3434

3535

3636
## Author
37-
Sameer Narkhede <br/>
38-
Profile : https://github.com/narkhedesam <br/>
39-
Website : https://narkhedesam.github.io/
37+
<b>Sameer Narkhede</b> <br/>
38+
<p align="left">
39+
<a href="https://github.com/narkhedesam" target="blank">
40+
<img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/github.svg" alt="https://github.com/narkhedesam" height="20" width="20" />
41+
</a>
42+
<a href="https://narkhedesam.com/" target="blank">
43+
<img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/googlechrome.svg" alt="https://narkhedesam.com/" height="20" width="20" />
44+
</a>
45+
<a href="https://www.linkedin.com/in/sameer-narkhede/" target="blank">
46+
<img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/linkedin.svg" alt="https://www.linkedin.com/in/sameer-narkhede/" height="20" width="20" />
47+
</a>
48+
<a href="https://www.facebook.com/narkhedesam" target="blank">
49+
<img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/facebook.svg" alt="https://www.facebook.com/narkhedesam" height="20" width="20" />
50+
</a>
51+
<a href="https://www.instagram.com/sam_narkhede/" target="blank">
52+
<img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/instagram.svg" alt="https://www.instagram.com/sam_narkhede/" height="20" width="20" />
53+
</a>
54+
<a href="https://t.me/narkhedesam" target="blank">
55+
<img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/telegram.svg" alt="https://t.me/narkhedesam" height="20" width="20" />
56+
</a>
4057

41-
<h5>special thanks to Batuhan Özyön - https://github.com/bynf </h5>
58+
</p>
59+
60+
<h5>special thanks to <b>Batuhan Özyön</b> - https://github.com/bynf </h5>
4261

4362

4463
## Screenshot
7.25 KB
Binary file not shown.

setup.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,19 @@
88

99
setup(
1010
name='Proxy-List-Scrapper',
11-
version='0.1.6',
11+
version='0.2.0',
1212
packages=find_packages(),
13-
url='',
13+
url='https://pypi.org/project/Proxy-List-Scrapper/',
1414
license='MIT License',
1515
author='Sameer Narkhede',
1616
author_email='narkhedesam@gmail.com',
17-
description='proxy list scrapper from various websites. they gives the free proxies for temporary use.',
17+
description='Proxy list scrapper from various websites. They gives the free proxies for temporary use.',
1818
# other arguments omitted
1919
long_description=long_description,
2020
long_description_content_type='text/markdown',
2121
install_requires=[
2222
'requests',
23-
],
23+
],
2424
include_package_data=True,
2525

2626
)

0 commit comments

Comments
 (0)