Quantcast
Channel: Active questions tagged amazon-ec2 - Stack Overflow
Viewing all articles
Browse latest Browse all 29243

How to solve 'RecursionError: maximum recursion depth exceeded' with Eventlet and Requests in Python

$
0
0

I am trying to implement the Amazon Web Scraper mentioned here. However, I get the output mentioned below. The output repeats until it stops with RecursionError: maximum recursion depth exceeded. I have already tried downgrading eventlet to version 0.17.4 as mentioned here. Also, the requestsmodule is getting patched as you can see in helpers.py.

helpers.py

import osimport randomfrom datetime import datetimefrom urllib.parse import urlparseimport eventletrequests = eventlet.import_patched('requests.__init__')time = eventlet.import_patched('time')import redisfrom bs4 import BeautifulSoupfrom requests.exceptions import RequestExceptionimport settingsnum_requests = 0redis = redis.StrictRedis(host=settings.redis_host, port=settings.redis_port, db=settings.redis_db)def make_request(url, return_soup=True):    # global request building and response handling    url = format_url(url)    if "picassoRedirect" in url:        return None  # skip the redirect URLs    global num_requests    if num_requests >= settings.max_requests:        raise Exception("Reached the max number of requests: {}".format(settings.max_requests))    proxies = get_proxy()    try:        r = requests.get(url, headers=settings.headers, proxies=proxies)    except RequestException as e:        log("WARNING: Request for {} failed, trying again.".format(url))    num_requests += 1    if r.status_code != 200:        os.system('say "Got non-200 Response"')        log("WARNING: Got a {} status code for URL: {}".format(r.status_code, url))        return None    if return_soup:        return BeautifulSoup(r.text), r.text    return rdef format_url(url):    # make sure URLs aren't relative, and strip unnecssary query args    u = urlparse(url)    scheme = u.scheme or "https"    host = u.netloc or "www.amazon.de"    path = u.path    if not u.query:        query = ""    else:        query = "?"        for piece in u.query.split("&"):            k, v = piece.split("=")            if k in settings.allowed_params:                query += "{k}={v}&".format(**locals())        query = query[:-1]    return "{scheme}://{host}{path}{query}".format(**locals())def log(msg):    # global logging function    if settings.log_stdout:        try:            print("{}: {}".format(datetime.now(), msg))        except UnicodeEncodeError:            pass  # squash logging errors in case of non-ascii textdef get_proxy():    # choose a proxy server to use for this request, if we need one    if not settings.proxies or len(settings.proxies) == 0:        return None    proxy = random.choice(settings.proxies)    proxy_url = "socks5://{user}:{passwd}@{ip}:{port}/".format(        user=settings.proxy_user,        passwd=settings.proxy_pass,        ip=proxy,        port=settings.proxy_port,    )    return {"http": proxy_url,"https": proxy_url    }if __name__ == '__main__':    # test proxy server IP masking    r = make_request('https://api.ipify.org?format=json', return_soup=False)    print(r.text)

output

Traceback (most recent call last):  File "helpers.py", line 112, in <module>    r = make_request('https://api.ipify.org?format=json', return_soup=False)  File "helpers.py", line 36, in make_request    r = requests.get(url, headers=settings.headers, proxies=proxies)  File "/home/ec2-user/env/lib64/python3.7/site-packages/requests/api.py", line 76, in get    return request('get', url, params=params, **kwargs)  File "/home/ec2-user/env/lib64/python3.7/site-packages/requests/api.py", line 61, in request    return session.request(method=method, url=url, **kwargs)  File "/home/ec2-user/env/lib64/python3.7/site-packages/requests/sessions.py", line 530, in request    resp = self.send(prep, **send_kwargs)  File "/home/ec2-user/env/lib64/python3.7/site-packages/requests/sessions.py", line 643, in send    r = adapter.send(request, **kwargs)  File "/home/ec2-user/env/lib64/python3.7/site-packages/requests/adapters.py", line 449, in send    timeout=timeout  File "/home/ec2-user/env/lib64/python3.7/site-packages/urllib3/connectionpool.py", line 672, in urlopen    chunked=chunked,  File "/home/ec2-user/env/lib64/python3.7/site-packages/urllib3/connectionpool.py", line 376, in _make_request    self._validate_conn(conn)  File "/home/ec2-user/env/lib64/python3.7/site-packages/urllib3/connectionpool.py", line 994, in _validate_conn    conn.connect()  File "/home/ec2-user/env/lib64/python3.7/site-packages/urllib3/connection.py", line 300, in connect    conn = self._new_conn()  File "/home/ec2-user/env/lib64/python3.7/site-packages/urllib3/contrib/socks.py", line 99, in _new_conn    **extra_kw  File "/home/ec2-user/env/lib64/python3.7/site-packages/socks.py", line 199, in create_connection    sock.connect((remote_host, remote_port))  File "/home/ec2-user/env/lib64/python3.7/site-packages/socks.py", line 47, in wrapper    return function(*args, **kwargs)  File "/home/ec2-user/env/lib64/python3.7/site-packages/socks.py", line 774, in connect    super(socksocket, self).settimeout(self._timeout)  File "/home/ec2-user/env/lib64/python3.7/site-packages/eventlet/greenio/base.py", line 395, in settimeout    self.setblocking(True)

What might be the problem here?


Viewing all articles
Browse latest Browse all 29243

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>