[python] Added more websites for getting free proxies

narkhedesam · narkhedesam · commit 4d3a4a8e4faf · 2020-12-18T16:59:51.000+05:30
diff --git a/Proxy_List_Scrapper/__init__.py b/Proxy_List_Scrapper/__init__.py
@@ -6,11 +6,26 @@
 
 import sys
 import traceback
-from re import findall
+from re import findall, sub
 
 import requests
 from requests.exceptions import ConnectionError
 
+SSL = 'https://www.sslproxies.org/',
+GOOGLE = 'https://www.google-proxy.net/',
+ANANY = 'https://free-proxy-list.net/anonymous-proxy.html',
+UK = 'https://free-proxy-list.net/uk-proxy.html',
+US = 'https://www.us-proxy.org/',
+NEW = 'https://free-proxy-list.net/',
+SPYS_ME = 'http://spys.me/proxy.txt',
+PROXYSCRAPE = 'https://api.proxyscrape.com/?request=getproxies&proxytype=all&country=all&ssl=all&anonymity=all',
+PROXYNOVA = 'https://www.proxynova.com/proxy-server-list/'
+PROXYLIST_DOWNLOAD_HTTP = 'https://www.proxy-list.download/HTTP'
+PROXYLIST_DOWNLOAD_HTTPS = 'https://www.proxy-list.download/HTTPS'
+PROXYLIST_DOWNLOAD_SOCKS4 = 'https://www.proxy-list.download/SOCKS4'
+PROXYLIST_DOWNLOAD_SOCKS5 = 'https://www.proxy-list.download/SOCKS5'
+ALL = 'ALL'
+
 
 class ScrapperException(BaseException):
     pass
@@ -20,6 +35,7 @@ class Proxies(object):
     """
        Proxies is the response data type of getProxies function
     """
+
     def __init__(self, proxies, category):
         """
         Initialize the proxies class
@@ -35,6 +51,7 @@ class Proxy(object):
     """
         Proxy is the class for proxy.
     """
+
     def __init__(self, ip, port):
         """
         Initialization of the proxy class
@@ -49,6 +66,7 @@ class Scrapper:
     """
     Scrapper class is use to scrape the proxies from various websites.
     """
+
     def __init__(self, category='ssl', print_err_trace=True):
         """
         Initialization of scrapper class
@@ -58,16 +76,22 @@ def __init__(self, category='ssl', print_err_trace=True):
         # init with Empty Proxy List
         self.proxies = []
         self.category = category
-        self.Categories = {'SSL': 'https://www.sslproxies.org/',
-                           'GOOGLE': 'https://www.google-proxy.net/',
-                           'ANANY': 'https://free-proxy-list.net/anonymous-proxy.html',
-                           'UK': 'https://free-proxy-list.net/uk-proxy.html',
-                           'US': 'https://www.us-proxy.org/',
-                           'NEW': 'https://free-proxy-list.net/',
-                           'SPYS.ME': 'http://spys.me/proxy.txt',
-                           'proxyscrape': 'https://api.proxyscrape.com/?request=getproxies&proxytype=all&country=all&ssl=all&anonymity=all',
-                           'ALL': 'ALL'
-                           }
+        self.Categories = {
+            'SSL': SSL,
+            'GOOGLE': GOOGLE,
+            'ANANY': ANANY,
+            'UK': UK,
+            'US': US,
+            'NEW': NEW,
+            'SPYS.ME': SPYS_ME,
+            'PROXYSCRAPE': PROXYSCRAPE,
+            'PROXYNOVA': PROXYNOVA,
+            'PROXYLIST_DOWNLOAD_HTTP': PROXYLIST_DOWNLOAD_HTTP,
+            'PROXYLIST_DOWNLOAD_HTTPS': PROXYLIST_DOWNLOAD_HTTPS,
+            'PROXYLIST_DOWNLOAD_SOCKS4': PROXYLIST_DOWNLOAD_SOCKS4,
+            'PROXYLIST_DOWNLOAD_SOCKS5': PROXYLIST_DOWNLOAD_SOCKS5,
+            'ALL': ALL
+        }
         self.print_trace = print_err_trace
 
     def getProxies(self):
@@ -100,6 +124,16 @@ def _get(self):
             r = requests.get(url=self.Categories[self.category])
             if self.category == 'SPYS.ME' or self.category == 'proxyscrape':
                 self.proxies = findall(pattern=r'\d+\.\d+\.\d+\.\d+:\d+', string=r.text)
+            if self.category == 'PROXYNOVA':
+                matches = findall(
+                    pattern=r'\d+\.\d+\.\d+\.\d+\'\)\;</script>\s*</abbr>\s*</td>\s*<td\salign=\"left\">\s*\d+',
+                    string=r.text)
+                self.proxies = [sub(r"\'\)\;</script>\s*</abbr>\s*</td>\s*<td\salign=\"left\">\s*", ":", m) for m in
+                                matches]
+            if self.category in {'PROXYLIST_DOWNLOAD_HTTP', 'PROXYLIST_DOWNLOAD_HTTPS',
+                                 'PROXYLIST_DOWNLOAD_SOCKS4', 'PROXYLIST_DOWNLOAD_SOCKS5'}:
+                matches = findall(pattern=r'\d+\.\d+\.\d+\.\d+</td>\s*<td>\d+', string=r.text)
+                self.proxies = [sub(r"</td>\s*<td>", ":", m) for m in matches]
             else:
                 matches = findall(pattern=r'\d+\.\d+\.\d+\.\d+</td><td>\d+', string=r.text)
                 self.proxies = [m.replace('</td><td>', ':') for m in matches]
diff --git a/README.md b/README.md
@@ -17,6 +17,15 @@
 Proxy List Scrapper from various websites. 
 They gives the free proxies for temporary use.
 
+### What is a proxy
+A proxy is server that acts like a gateway or intermediary between any device and the rest of the internet. A proxy accepts and forwards connection requests, then returns data for those requests. This is the basic definition, which is quite limited, because there are dozens of unique proxy types with their own distinct configurations.
+
+### What are the most popular types of proxies:
+Residential proxies, Datacenter proxies, Anonymous proxies, Transparent proxies
+
+### People use proxies to:
+Avoid Geo-restrictions, Protect Privacy and Increase Security, Avoid Firewalls and Bans, Automate Online Processes, Use Multiple Accounts and Gather Data
+
 #### Chrome Extension in here
 you can download the chrome extension "Free Proxy List Scrapper Chrome Extension" folder and load in the extension.<br/>
 ##### Goto Chrome Extension <a href="https://chrome.google.com/webstore/detail/free-proxy-list-scrapper/jpnflejagpflcemgfnhckkdckpkkfbcc?hl=en-US">click here</a>.
@@ -44,15 +53,20 @@ After that simply create an object of Scrapper class as "scrapper"<br/>
 
 Here Your need to specify category defined as below:<br/>
 
-    'SSL': 'https://www.sslproxies.org/',
-    'GOOGLE': 'https://www.google-proxy.net/',
-    'ANANY': 'https://free-proxy-list.net/anonymous-proxy.html',
-    'UK': 'https://free-proxy-list.net/uk-proxy.html',
-    'US': 'https://www.us-proxy.org/',
-    'NEW': 'https://free-proxy-list.net/',
-    'SPYS.ME': 'http://spys.me/proxy.txt',
-    'proxyscrape': 'https://api.proxyscrape.com/?request=getproxies&proxytype=all&country=all&ssl=all&anonymity=all',
-    'ALL': 'ALL'
+    SSL = 'https://www.sslproxies.org/',
+    GOOGLE = 'https://www.google-proxy.net/',
+    ANANY = 'https://free-proxy-list.net/anonymous-proxy.html',
+    UK = 'https://free-proxy-list.net/uk-proxy.html',
+    US = 'https://www.us-proxy.org/',
+    NEW = 'https://free-proxy-list.net/',
+    SPYS_ME = 'http://spys.me/proxy.txt',
+    PROXYSCRAPE = 'https://api.proxyscrape.com/?request=getproxies&proxytype=all&country=all&ssl=all&anonymity=all',
+    PROXYNOVA = 'https://www.proxynova.com/proxy-server-list/'
+    PROXYLIST_DOWNLOAD_HTTP = 'https://www.proxy-list.download/HTTP'
+    PROXYLIST_DOWNLOAD_HTTPS = 'https://www.proxy-list.download/HTTPS'
+    PROXYLIST_DOWNLOAD_SOCKS4 = 'https://www.proxy-list.download/SOCKS4'
+    PROXYLIST_DOWNLOAD_SOCKS5 = 'https://www.proxy-list.download/SOCKS5'
+    ALL = 'ALL'
 
 These are all categories.<br/>
 After you have to call a function named "getProxies"<br/>
@@ -85,9 +99,28 @@ in data having proxies,len,category
     print(data.category)
   
 ## Author 
-Sameer Narkhede <br/>
-Profile : https://github.com/narkhedesam <br/>
-Website : https://narkhedesam.github.io/ 
+<b>Sameer Narkhede</b> <br/>
+<p align="left">
+  <a href="https://github.com/narkhedesam" target="blank">
+    <img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/github.svg" alt="https://github.com/narkhedesam" height="20" width="20" />
+  </a>
+  <a href="https://narkhedesam.com/" target="blank">
+    <img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/googlechrome.svg" alt="https://narkhedesam.com/" height="20" width="20" />
+  </a>
+  <a href="https://www.linkedin.com/in/sameer-narkhede/" target="blank">
+    <img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/linkedin.svg" alt="https://www.linkedin.com/in/sameer-narkhede/" height="20" width="20" />
+  </a>
+  <a href="https://www.facebook.com/narkhedesam" target="blank">
+    <img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/facebook.svg" alt="https://www.facebook.com/narkhedesam" height="20" width="20" />
+  </a>
+  <a href="https://www.instagram.com/sam_narkhede/" target="blank">
+    <img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/instagram.svg" alt="https://www.instagram.com/sam_narkhede/" height="20" width="20" />
+  </a>
+  <a href="https://t.me/narkhedesam" target="blank">
+    <img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/telegram.svg" alt="https://t.me/narkhedesam" height="20" width="20" />
+  </a>
+
+</p>
 
 ### Thanks for giving free proxies
  - https://www.sslproxies.org/
@@ -98,6 +131,8 @@ Website : https://narkhedesam.github.io/
  - https://free-proxy-list.net/
  - http://spys.me/proxy.txt
  - https://proxyscrape.com/
+ - https://www.proxynova.com/proxy-server-list/
+ - https://www.proxy-list.download/
 <br/><br/>
 
 
diff --git a/Web_Scrapper/README.md b/Web_Scrapper/README.md
@@ -34,11 +34,30 @@ get response from scrape.do api
  
  
 ## Author 
-Sameer Narkhede <br/>
-Profile : https://github.com/narkhedesam <br/>
-Website : https://narkhedesam.github.io/ 
+<b>Sameer Narkhede</b> <br/>
+<p align="left">
+  <a href="https://github.com/narkhedesam" target="blank">
+    <img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/github.svg" alt="https://github.com/narkhedesam" height="20" width="20" />
+  </a>
+  <a href="https://narkhedesam.com/" target="blank">
+    <img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/googlechrome.svg" alt="https://narkhedesam.com/" height="20" width="20" />
+  </a>
+  <a href="https://www.linkedin.com/in/sameer-narkhede/" target="blank">
+    <img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/linkedin.svg" alt="https://www.linkedin.com/in/sameer-narkhede/" height="20" width="20" />
+  </a>
+  <a href="https://www.facebook.com/narkhedesam" target="blank">
+    <img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/facebook.svg" alt="https://www.facebook.com/narkhedesam" height="20" width="20" />
+  </a>
+  <a href="https://www.instagram.com/sam_narkhede/" target="blank">
+    <img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/instagram.svg" alt="https://www.instagram.com/sam_narkhede/" height="20" width="20" />
+  </a>
+  <a href="https://t.me/narkhedesam" target="blank">
+    <img align="center" src="https://cdn.jsdelivr.net/npm/simple-icons@3.0.1/icons/telegram.svg" alt="https://t.me/narkhedesam" height="20" width="20" />
+  </a>
 
-<h5>special thanks to Batuhan Özyön - https://github.com/bynf </h5>
+</p>
+
+<h5>special thanks to <b>Batuhan Özyön</b> - https://github.com/bynf </h5>
  
  
 ## Screenshot
diff --git a/dist/Proxy-List-Scrapper-0.2.0.tar.gz b/dist/Proxy-List-Scrapper-0.2.0.tar.gz
diff --git a/setup.py b/setup.py
@@ -8,19 +8,19 @@
 
 setup(
     name='Proxy-List-Scrapper',
-    version='0.1.6',
+    version='0.2.0',
     packages=find_packages(),
-    url='',
+    url='https://pypi.org/project/Proxy-List-Scrapper/',
     license='MIT License',
     author='Sameer Narkhede',
     author_email='narkhedesam@gmail.com',
-    description='proxy list scrapper from various websites. they gives the free proxies for temporary use.',
+    description='Proxy list scrapper from various websites. They gives the free proxies for temporary use.',
     # other arguments omitted
     long_description=long_description,
     long_description_content_type='text/markdown',
     install_requires=[
           'requests',
-      ],
+    ],
     include_package_data=True,
 
 )