From 01a2719918693c831c7fa9a5b25b0d00ff4c0ba0 Mon Sep 17 00:00:00 2001 From: nanos Date: Tue, 25 Jun 2024 16:32:47 +0100 Subject: [PATCH] shorten http timeout for robots.txt fetch --- find_posts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/find_posts.py b/find_posts.py index 61879de..a77ad51 100644 --- a/find_posts.py +++ b/find_posts.py @@ -1017,7 +1017,7 @@ def can_fetch(user_agent, url): else: try: # We are getting the robots.txt manually from here, because otherwise we can't change the User Agent - robotsTxt = get(robots, ignore_robots_txt=True) + robotsTxt = get(robots, timeout = 2, ignore_robots_txt=True) if robotsTxt.status_code in (401, 403): ROBOTS_TXT[robots] = False return False