shorten http timeout for robots.txt fetch
This commit is contained in:
parent
dec718db76
commit
01a2719918
1 changed files with 1 additions and 1 deletions
|
|
@ -1017,7 +1017,7 @@ def can_fetch(user_agent, url):
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
# We are getting the robots.txt manually from here, because otherwise we can't change the User Agent
|
# We are getting the robots.txt manually from here, because otherwise we can't change the User Agent
|
||||||
robotsTxt = get(robots, ignore_robots_txt=True)
|
robotsTxt = get(robots, timeout = 2, ignore_robots_txt=True)
|
||||||
if robotsTxt.status_code in (401, 403):
|
if robotsTxt.status_code in (401, 403):
|
||||||
ROBOTS_TXT[robots] = False
|
ROBOTS_TXT[robots] = False
|
||||||
return False
|
return False
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue