shorten http timeout for robots.txt fetch
This commit is contained in:
parent
dec718db76
commit
01a2719918
1 changed files with 1 additions and 1 deletions
|
|
@ -1017,7 +1017,7 @@ def can_fetch(user_agent, url):
|
|||
else:
|
||||
try:
|
||||
# We are getting the robots.txt manually from here, because otherwise we can't change the User Agent
|
||||
robotsTxt = get(robots, ignore_robots_txt=True)
|
||||
robotsTxt = get(robots, timeout = 2, ignore_robots_txt=True)
|
||||
if robotsTxt.status_code in (401, 403):
|
||||
ROBOTS_TXT[robots] = False
|
||||
return False
|
||||
|
|
|
|||
Loading…
Reference in a new issue