shorten http timeout for robots.txt fetch

This commit is contained in:
nanos 2024-06-25 16:32:47 +01:00
parent dec718db76
commit 01a2719918

View file

@ -1017,7 +1017,7 @@ def can_fetch(user_agent, url):
else: else:
try: try:
# We are getting the robots.txt manually from here, because otherwise we can't change the User Agent # We are getting the robots.txt manually from here, because otherwise we can't change the User Agent
robotsTxt = get(robots, ignore_robots_txt=True) robotsTxt = get(robots, timeout = 2, ignore_robots_txt=True)
if robotsTxt.status_code in (401, 403): if robotsTxt.status_code in (401, 403):
ROBOTS_TXT[robots] = False ROBOTS_TXT[robots] = False
return False return False