Merge pull request #164 from AndrewKvalheim/trivial

Remove unused code, correct spelling
This commit is contained in:
Michael 2024-09-03 16:16:11 +01:00 committed by GitHub
commit d6066f34c8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,7 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from datetime import datetime, timedelta from datetime import datetime, timedelta
import string
from dateutil import parser from dateutil import parser
import itertools import itertools
import json import json
@ -53,7 +52,7 @@ argparser.add_argument('--max-list-length', required=False, type=int, default=10
argparser.add_argument('--max-list-accounts', required=False, type=int, default=10, help="Determines how many accounts we'll backfill for in each list. This will be ignored, unless you also provide `from-lists = 1`. Set to `0` if you only want to fetch replies in lists.") argparser.add_argument('--max-list-accounts', required=False, type=int, default=10, help="Determines how many accounts we'll backfill for in each list. This will be ignored, unless you also provide `from-lists = 1`. Set to `0` if you only want to fetch replies in lists.")
argparser.add_argument('--log-level', required=False, default="DEBUG", help="Severity of events to log (DEBUG|INFO|WARNING|ERROR|CRITICAL)") argparser.add_argument('--log-level', required=False, default="DEBUG", help="Severity of events to log (DEBUG|INFO|WARNING|ERROR|CRITICAL)")
argparser.add_argument('--log-format', required=False, type=str, default="%(asctime)s: %(message)s",help="Specify the log format") argparser.add_argument('--log-format', required=False, type=str, default="%(asctime)s: %(message)s",help="Specify the log format")
argparser.add_argument('--instance-blocklist', required=False, type=str, default="",help="A comma-seperated array of instances that FediFetcher should never try to connect to") argparser.add_argument('--instance-blocklist', required=False, type=str, default="",help="A comma-separated array of instances that FediFetcher should never try to connect to")
def get_notification_users(server, access_token, known_users, max_age): def get_notification_users(server, access_token, known_users, max_age):
since = datetime.now(datetime.now().astimezone().tzinfo) - timedelta(hours=max_age) since = datetime.now(datetime.now().astimezone().tzinfo) - timedelta(hours=max_age)
@ -100,7 +99,7 @@ def add_user_posts(server, access_token, followings, known_followings, all_known
failed += 1 failed += 1
logger.info(f"Added {count} posts for user {user['acct']} with {failed} errors") logger.info(f"Added {count} posts for user {user['acct']} with {failed} errors")
if failed == 0: if failed == 0:
known_followings.add(user['acct']) known_followings.add(user['acct'])
all_known_users.add(user['acct']) all_known_users.add(user['acct'])
def add_post_with_context(post, server, access_token, seen_urls, seen_hosts): def add_post_with_context(post, server, access_token, seen_urls, seen_hosts):
@ -115,7 +114,7 @@ def add_post_with_context(post, server, access_token, seen_urls, seen_hosts):
known_context_urls = get_all_known_context_urls(server, [post],parsed_urls, seen_hosts) known_context_urls = get_all_known_context_urls(server, [post],parsed_urls, seen_hosts)
add_context_urls(server, access_token, known_context_urls, seen_urls) add_context_urls(server, access_token, known_context_urls, seen_urls)
return True return True
return False return False
def user_has_opted_out(user): def user_has_opted_out(user):
@ -126,7 +125,7 @@ def user_has_opted_out(user):
if 'discoverable' in user and not user['discoverable']: if 'discoverable' in user and not user['discoverable']:
return True return True
return False return False
def get_user_posts(user, known_followings, server, seen_hosts): def get_user_posts(user, known_followings, server, seen_hosts):
if user_has_opted_out(user): if user_has_opted_out(user):
@ -138,7 +137,7 @@ def get_user_posts(user, known_followings, server, seen_hosts):
# We are adding it as 'known' anyway, because we won't be able to fix this. # We are adding it as 'known' anyway, because we won't be able to fix this.
known_followings.add(user['acct']) known_followings.add(user['acct'])
return None return None
if(parsed_url[0] == server): if(parsed_url[0] == server):
logger.debug(f"{user['acct']} is a local user. Skip") logger.debug(f"{user['acct']} is a local user. Skip")
known_followings.add(user['acct']) known_followings.add(user['acct'])
@ -157,7 +156,7 @@ def get_user_posts(user, known_followings, server, seen_hosts):
if post_server['misskeyApiSupport']: if post_server['misskeyApiSupport']:
return get_user_posts_misskey(parsed_url[1], post_server['webserver']) return get_user_posts_misskey(parsed_url[1], post_server['webserver'])
if post_server['peertubeApiSupport']: if post_server['peertubeApiSupport']:
return get_user_posts_peertube(parsed_url[1], post_server['webserver']) return get_user_posts_peertube(parsed_url[1], post_server['webserver'])
@ -219,11 +218,11 @@ def get_user_posts_lemmy(userName, userUrl, webserver):
for post in all_posts: for post in all_posts:
post['url'] = post['ap_id'] post['url'] = post['ap_id']
return all_posts return all_posts
except Exception as ex: except Exception as ex:
logger.error(f"Error getting user posts for user {userName}: {ex}") logger.error(f"Error getting user posts for user {userName}: {ex}")
return None return None
def get_user_posts_peertube(userName, webserver): def get_user_posts_peertube(userName, webserver):
try: try:
url = f'https://{webserver}/api/v1/accounts/{userName}/videos' url = f'https://{webserver}/api/v1/accounts/{userName}/videos'
@ -280,7 +279,7 @@ def get_user_posts_misskey(userName, webserver):
except Exception as ex: except Exception as ex:
logger.error(f"Error getting posts by user {userName} from {webserver}. Exception: {ex}") logger.error(f"Error getting posts by user {userName} from {webserver}. Exception: {ex}")
return None return None
def get_new_follow_requests(server, access_token, max, known_followings): def get_new_follow_requests(server, access_token, max, known_followings):
"""Get any new follow requests for the specified user, up to the max number provided""" """Get any new follow requests for the specified user, up to the max number provided"""
@ -289,11 +288,11 @@ def get_new_follow_requests(server, access_token, max, known_followings):
"Authorization": f"Bearer {access_token}", "Authorization": f"Bearer {access_token}",
}) })
# Remove any we already know about # Remove any we already know about
new_follow_requests = filter_known_users(follow_requests, known_followings) new_follow_requests = filter_known_users(follow_requests, known_followings)
logger.info(f"Got {len(follow_requests)} follow_requests, {len(new_follow_requests)} of which are new") logger.info(f"Got {len(follow_requests)} follow_requests, {len(new_follow_requests)} of which are new")
return new_follow_requests return new_follow_requests
def filter_known_users(users, known_users): def filter_known_users(users, known_users):
@ -306,24 +305,24 @@ def get_new_followers(server, user_id, max, known_followers):
"""Get any new followings for the specified user, up to the max number provided""" """Get any new followings for the specified user, up to the max number provided"""
followers = get_paginated_mastodon(f"https://{server}/api/v1/accounts/{user_id}/followers", max) followers = get_paginated_mastodon(f"https://{server}/api/v1/accounts/{user_id}/followers", max)
# Remove any we already know about # Remove any we already know about
new_followers = filter_known_users(followers, known_followers) new_followers = filter_known_users(followers, known_followers)
logger.info(f"Got {len(followers)} followers, {len(new_followers)} of which are new") logger.info(f"Got {len(followers)} followers, {len(new_followers)} of which are new")
return new_followers return new_followers
def get_new_followings(server, user_id, max, known_followings): def get_new_followings(server, user_id, max, known_followings):
"""Get any new followings for the specified user, up to the max number provided""" """Get any new followings for the specified user, up to the max number provided"""
following = get_paginated_mastodon(f"https://{server}/api/v1/accounts/{user_id}/following", max) following = get_paginated_mastodon(f"https://{server}/api/v1/accounts/{user_id}/following", max)
# Remove any we already know about # Remove any we already know about
new_followings = filter_known_users(following, known_followings) new_followings = filter_known_users(following, known_followings)
logger.info(f"Got {len(following)} followings, {len(new_followings)} of which are new") logger.info(f"Got {len(following)} followings, {len(new_followings)} of which are new")
return new_followings return new_followings
def get_user_id(server, user = None, access_token = None): def get_user_id(server, user = None, access_token = None):
"""Get the user id from the server, using a username""" """Get the user id from the server, using a username"""
@ -339,11 +338,11 @@ def get_user_id(server, user = None, access_token = None):
} }
else: else:
raise Exception('You must supply either a user name or an access token, to get an user ID') raise Exception('You must supply either a user name or an access token, to get an user ID')
response = get(url, headers=headers) response = get(url, headers=headers)
if response.status_code == 200: if response.status_code == 200:
return response.json()['id'] return response.json()['id']
elif response.status_code == 404: elif response.status_code == 404:
raise Exception( raise Exception(
f"User {user} was not found on server {server}." f"User {user} was not found on server {server}."
@ -359,7 +358,7 @@ def get_timeline(server, access_token, max):
url = f"https://{server}/api/v1/timelines/home" url = f"https://{server}/api/v1/timelines/home"
try: try:
response = get_toots(url, access_token) response = get_toots(url, access_token)
if response.status_code == 200: if response.status_code == 200:
@ -390,7 +389,7 @@ def get_timeline(server, access_token, max):
logger.info(f"Found {len(toots)} toots in timeline") logger.info(f"Found {len(toots)} toots in timeline")
return toots return toots
def get_toots(url, access_token): def get_toots(url, access_token):
response = get( url, headers={ response = get( url, headers={
"Authorization": f"Bearer {access_token}", "Authorization": f"Bearer {access_token}",
@ -412,7 +411,7 @@ def get_toots(url, access_token):
raise Exception( raise Exception(
f"Error getting URL {url}. Status code: {response.status_code}" f"Error getting URL {url}. Status code: {response.status_code}"
) )
def get_active_user_ids(server, access_token, reply_interval_hours): def get_active_user_ids(server, access_token, reply_interval_hours):
"""get all user IDs on the server that have posted a toot in the given """get all user IDs on the server that have posted a toot in the given
time interval""" time interval"""
@ -529,12 +528,12 @@ def toot_context_should_be_fetched(toot):
if(lastSeenInSeconds >= 60 * 60): if(lastSeenInSeconds >= 60 * 60):
# After that: hourly # After that: hourly
return True return True
return False return False
def get_all_known_context_urls(server, reply_toots, parsed_urls, seen_hosts): def get_all_known_context_urls(server, reply_toots, parsed_urls, seen_hosts):
"""get the context toots of the given toots from their original server""" """get the context toots of the given toots from their original server"""
known_context_urls = set() known_context_urls = set()
for toot in reply_toots: for toot in reply_toots:
if toot_has_parseable_url(toot, parsed_urls): if toot_has_parseable_url(toot, parsed_urls):
url = toot["url"] if toot["reblog"] is None else toot["reblog"]["url"] url = toot["url"] if toot["reblog"] is None else toot["reblog"]["url"]
@ -547,10 +546,10 @@ def get_all_known_context_urls(server, reply_toots, parsed_urls, seen_hosts):
known_context_urls.add(item) known_context_urls.add(item)
else: else:
logger.error(f"Error getting context for toot {url}") logger.error(f"Error getting context for toot {url}")
known_context_urls = set(filter(lambda url: not url.startswith(f"https://{server}/"), known_context_urls)) known_context_urls = set(filter(lambda url: not url.startswith(f"https://{server}/"), known_context_urls))
logger.info(f"Found {len(known_context_urls)} known context toots") logger.info(f"Found {len(known_context_urls)} known context toots")
return known_context_urls return known_context_urls
@ -559,7 +558,7 @@ def toot_has_parseable_url(toot,parsed_urls):
if(parsed is None) : if(parsed is None) :
return False return False
return True return True
def get_all_replied_toot_server_ids( def get_all_replied_toot_server_ids(
server, reply_toots, replied_toot_server_ids, parsed_urls server, reply_toots, replied_toot_server_ids, parsed_urls
@ -610,7 +609,7 @@ def parse_user_url(url):
match = parse_mastodon_profile_url(url) match = parse_mastodon_profile_url(url)
if match is not None: if match is not None:
return match return match
match = parse_pleroma_profile_url(url) match = parse_pleroma_profile_url(url)
if match is not None: if match is not None:
return match return match
@ -629,7 +628,7 @@ def parse_user_url(url):
return match return match
logger.error(f"Error parsing Profile URL {url}") logger.error(f"Error parsing Profile URL {url}")
return None return None
def parse_url(url, parsed_urls): def parse_url(url, parsed_urls):
@ -642,7 +641,7 @@ def parse_url(url, parsed_urls):
match = parse_mastodon_uri(url) match = parse_mastodon_uri(url)
if match is not None: if match is not None:
parsed_urls[url] = match parsed_urls[url] = match
if url not in parsed_urls: if url not in parsed_urls:
match = parse_pleroma_url(url) match = parse_pleroma_url(url)
if match is not None: if match is not None:
@ -671,7 +670,7 @@ def parse_url(url, parsed_urls):
if url not in parsed_urls: if url not in parsed_urls:
logger.error(f"Error parsing toot URL {url}") logger.error(f"Error parsing toot URL {url}")
parsed_urls[url] = None parsed_urls[url] = None
return parsed_urls[url] return parsed_urls[url]
def parse_mastodon_profile_url(url): def parse_mastodon_profile_url(url):
@ -709,7 +708,7 @@ def parse_pleroma_url(url):
url = get_redirect_url(url) url = get_redirect_url(url)
if url is None: if url is None:
return None return None
match = re.match(r"/notice/(?P<toot_id>[^/]+)", url) match = re.match(r"/notice/(?P<toot_id>[^/]+)", url)
if match is not None: if match is not None:
return (server, match.group("toot_id")) return (server, match.group("toot_id"))
@ -872,7 +871,7 @@ def get_lemmy_comment_context(webserver, toot_id, toot_url):
except Exception as ex: except Exception as ex:
logger.error(f"Error getting comment {toot_id} from {toot_url}. Exception: {ex}") logger.error(f"Error getting comment {toot_id} from {toot_url}. Exception: {ex}")
return [] return []
if resp.status_code == 200: if resp.status_code == 200:
try: try:
res = resp.json() res = resp.json()
@ -929,7 +928,7 @@ def get_peertube_urls(webserver, post_id, toot_url):
except Exception as ex: except Exception as ex:
logger.error(f"Error getting comments on video {post_id} from {toot_url}. Exception: {ex}") logger.error(f"Error getting comments on video {post_id} from {toot_url}. Exception: {ex}")
return [] return []
if resp.status_code == 200: if resp.status_code == 200:
return [comment['url'] for comment in resp.json()['data']] return [comment['url'] for comment in resp.json()['data']]
@ -1019,7 +1018,7 @@ def add_context_url(url, server, access_token):
f"Error adding url {search_url} to server {server}. Status code: {resp.status_code}" f"Error adding url {search_url} to server {server}. Status code: {resp.status_code}"
) )
return False return False
def get_paginated_mastodon(url, max, headers = {}, timeout = 0, max_tries = 5): def get_paginated_mastodon(url, max, headers = {}, timeout = 0, max_tries = 5):
"""Make a paginated request to mastodon""" """Make a paginated request to mastodon"""
if(isinstance(max, int)): if(isinstance(max, int)):
@ -1084,7 +1083,7 @@ def get_cached_robots(robots_url):
## firstly: check the in-memory cache ## firstly: check the in-memory cache
if robots_url in ROBOTS_TXT: if robots_url in ROBOTS_TXT:
return ROBOTS_TXT[robots_url] return ROBOTS_TXT[robots_url]
robotsCachePath = get_robots_txt_cache_path(robots_url) robotsCachePath = get_robots_txt_cache_path(robots_url)
if os.path.exists(robotsCachePath): if os.path.exists(robotsCachePath):
with open(robotsCachePath, "r", encoding="utf-8") as f: with open(robotsCachePath, "r", encoding="utf-8") as f:
@ -1092,14 +1091,14 @@ def get_cached_robots(robots_url):
robotsTxt = f.read() robotsTxt = f.read()
ROBOTS_TXT[robots_url] = robotsTxt ROBOTS_TXT[robots_url] = robotsTxt
return robotsTxt return robotsTxt
return None return None
def get_robots_from_url(robots_url): def get_robots_from_url(robots_url):
robotsTxt = get_cached_robots(robots_url) robotsTxt = get_cached_robots(robots_url)
if robotsTxt != None: if robotsTxt != None:
return robotsTxt return robotsTxt
try: try:
# We are getting the robots.txt manually from here, because otherwise we can't change the User Agent # We are getting the robots.txt manually from here, because otherwise we can't change the User Agent
robotsTxt = get(robots_url, timeout = 2, ignore_robots_txt=True) robotsTxt = get(robots_url, timeout = 2, ignore_robots_txt=True)
@ -1110,7 +1109,7 @@ def get_robots_from_url(robots_url):
with open(get_robots_txt_cache_path(robots_url), "w", encoding="utf-8") as f: with open(get_robots_txt_cache_path(robots_url), "w", encoding="utf-8") as f:
f.write(robotsTxt) f.write(robotsTxt)
except Exception as ex: except Exception:
robotsTxt = True robotsTxt = True
ROBOTS_TXT[robots_url] = robotsTxt ROBOTS_TXT[robots_url] = robotsTxt
@ -1128,7 +1127,7 @@ def can_fetch(user_agent, url):
robotsTxt = get_robots_from_url(robots_url) robotsTxt = get_robots_from_url(robots_url)
if isinstance(robotsTxt, bool): if isinstance(robotsTxt, bool):
return robotsTxt return robotsTxt
robotParser = urllib.robotparser.RobotFileParser() robotParser = urllib.robotparser.RobotFileParser()
robotParser.parse(robotsTxt.splitlines()) robotParser.parse(robotsTxt.splitlines())
return robotParser.can_fetch(user_agent, url) return robotParser.can_fetch(user_agent, url)
@ -1144,11 +1143,11 @@ def get(url, headers = {}, timeout = 0, max_tries = 5, ignore_robots_txt = False
h['User-Agent'] = user_agent() h['User-Agent'] = user_agent()
if not ignore_robots_txt and not can_fetch(h['User-Agent'], url): if not ignore_robots_txt and not can_fetch(h['User-Agent'], url):
raise Exception(f"Querying {url} prohibited by robots.txt") raise Exception(f"Querying {url} prohibited by robots.txt")
if timeout == 0: if timeout == 0:
timeout = arguments.http_timeout timeout = arguments.http_timeout
response = requests.get( url, headers= h, timeout=timeout) response = requests.get( url, headers= h, timeout=timeout)
if response.status_code == 429: if response.status_code == 429:
if max_tries > 0: if max_tries > 0:
@ -1158,7 +1157,7 @@ def get(url, headers = {}, timeout = 0, max_tries = 5, ignore_robots_txt = False
logger.warning(f"Rate Limit hit requesting {url}. Waiting {wait} sec to retry at {response.headers['x-ratelimit-reset']}") logger.warning(f"Rate Limit hit requesting {url}. Waiting {wait} sec to retry at {response.headers['x-ratelimit-reset']}")
time.sleep(wait) time.sleep(wait)
return get(url, headers, timeout, max_tries - 1) return get(url, headers, timeout, max_tries - 1)
raise Exception(f"Maximum number of retries exceeded for rate limited request {url}") raise Exception(f"Maximum number of retries exceeded for rate limited request {url}")
return response return response
@ -1169,8 +1168,8 @@ def post(url, json, headers = {}, timeout = 0, max_tries = 5):
h['User-Agent'] = user_agent() h['User-Agent'] = user_agent()
if not can_fetch(h['User-Agent'], url): if not can_fetch(h['User-Agent'], url):
raise Exception(f"Querying {url} prohibited by robots.txt") raise Exception(f"Querying {url} prohibited by robots.txt")
if timeout == 0: if timeout == 0:
timeout = arguments.http_timeout timeout = arguments.http_timeout
@ -1200,10 +1199,10 @@ class ServerList:
def get(self, key): def get(self, key):
return self._dict[key] return self._dict[key]
def pop(self,key): def pop(self,key):
return self._dict.pop(key) return self._dict.pop(key)
def __contains__(self, item): def __contains__(self, item):
return item in self._dict return item in self._dict
@ -1212,7 +1211,7 @@ class ServerList:
def __len__(self): def __len__(self):
return len(self._dict) return len(self._dict)
def toJSON(self): def toJSON(self):
return json.dumps(self._dict,default=str) return json.dumps(self._dict,default=str)
@ -1241,7 +1240,7 @@ class OrderedSet:
def pop(self, item): def pop(self, item):
self._dict.pop(item) self._dict.pop(item)
def get(self, item): def get(self, item):
return self._dict[item] return self._dict[item]
@ -1257,7 +1256,7 @@ class OrderedSet:
def __len__(self): def __len__(self):
return len(self._dict) return len(self._dict)
def toJSON(self): def toJSON(self):
return json.dumps(self._dict,default=str) return json.dumps(self._dict,default=str)
@ -1516,15 +1515,15 @@ if __name__ == "__main__":
if tokens := [token for envvar, token in os.environ.items() if envvar.lower().startswith("ff_access_token")]: if tokens := [token for envvar, token in os.environ.items() if envvar.lower().startswith("ff_access_token")]:
arguments.access_token = tokens arguments.access_token = tokens
logger.info(f"Starting FediFetcher") logger.info("Starting FediFetcher")
if(arguments.server == None or arguments.access_token == None): if(arguments.server == None or arguments.access_token == None):
logger.critical("You must supply at least a server name and an access token") logger.critical("You must supply at least a server name and an access token")
sys.exit(1) sys.exit(1)
# in case someone provided the server name as url instead, # in case someone provided the server name as url instead,
setattr(arguments, 'server', re.sub(r"^(https://)?([^/]*)/?$", "\\2", arguments.server)) setattr(arguments, 'server', re.sub(r"^(https://)?([^/]*)/?$", "\\2", arguments.server))
runId = uuid.uuid4() runId = uuid.uuid4()
@ -1545,9 +1544,9 @@ if __name__ == "__main__":
with open(LOCK_FILE, "r", encoding="utf-8") as f: with open(LOCK_FILE, "r", encoding="utf-8") as f:
lock_time = parser.parse(f.read()) lock_time = parser.parse(f.read())
if (datetime.now() - lock_time).total_seconds() >= arguments.lock_hours * 60 * 60: if (datetime.now() - lock_time).total_seconds() >= arguments.lock_hours * 60 * 60:
os.remove(LOCK_FILE) os.remove(LOCK_FILE)
logger.debug(f"Lock file has expired. Removed lock file.") logger.debug("Lock file has expired. Removed lock file.")
else: else:
logger.critical(f"Lock file age is {datetime.now() - lock_time} - below --lock-hours={arguments.lock_hours} provided.") logger.critical(f"Lock file age is {datetime.now() - lock_time} - below --lock-hours={arguments.lock_hours} provided.")
if(arguments.on_fail != None and arguments.on_fail != ''): if(arguments.on_fail != None and arguments.on_fail != ''):
@ -1558,7 +1557,7 @@ if __name__ == "__main__":
sys.exit(1) sys.exit(1)
except Exception: except Exception:
logger.critical(f"Cannot read logfile age - aborting.") logger.critical("Cannot read logfile age - aborting.")
if(arguments.on_fail != None and arguments.on_fail != ''): if(arguments.on_fail != None and arguments.on_fail != ''):
try: try:
get(f"{arguments.on_fail}?rid={runId}", ignore_robots_txt = True) get(f"{arguments.on_fail}?rid={runId}", ignore_robots_txt = True)
@ -1606,7 +1605,7 @@ if __name__ == "__main__":
lastCheck = recently_checked_users.get(user) lastCheck = recently_checked_users.get(user)
userAge = datetime.now(lastCheck.tzinfo) - lastCheck userAge = datetime.now(lastCheck.tzinfo) - lastCheck
if(userAge.total_seconds() > arguments.remember_users_for_hours * 60 * 60): if(userAge.total_seconds() > arguments.remember_users_for_hours * 60 * 60):
recently_checked_users.pop(user) recently_checked_users.pop(user)
recently_checked_context = {} recently_checked_context = {}
if(os.path.exists(RECENTLY_CHECKED_CONTEXTS_FILE)): if(os.path.exists(RECENTLY_CHECKED_CONTEXTS_FILE)):
@ -1621,7 +1620,7 @@ if __name__ == "__main__":
userAge = datetime.now(lastSeen.tzinfo) - lastSeen userAge = datetime.now(lastSeen.tzinfo) - lastSeen
# dont really need to keep track for more than 7 days: if we haven't seen it in 7 days we can refetch content anyway # dont really need to keep track for more than 7 days: if we haven't seen it in 7 days we can refetch content anyway
if(userAge.total_seconds() > 7 * 24 * 60 * 60): if(userAge.total_seconds() > 7 * 24 * 60 * 60):
recently_checked_context.pop(tootUrl) recently_checked_context.pop(tootUrl)
parsed_urls = {} parsed_urls = {}
@ -1652,7 +1651,7 @@ if __name__ == "__main__":
if os.path.getmtime(file_path) < time.time() - 60 * 60 * 24: if os.path.getmtime(file_path) < time.time() - 60 * 60 * 24:
logger.debug(f"Removing cached robots.txt file {file_name}") logger.debug(f"Removing cached robots.txt file {file_name}")
os.remove(file_path) os.remove(file_path)
if(isinstance(arguments.access_token, str)): if(isinstance(arguments.access_token, str)):
setattr(arguments, 'access_token', [arguments.access_token]) setattr(arguments, 'access_token', [arguments.access_token])
@ -1692,7 +1691,7 @@ if __name__ == "__main__":
if arguments.home_timeline_length > 0: if arguments.home_timeline_length > 0:
"""Do the same with any toots on the key owner's home timeline """ """Do the same with any toots on the key owner's home timeline """
logger.info(f"Getting context for home timeline") logger.info("Getting context for home timeline")
timeline_toots = get_timeline(arguments.server, token, arguments.home_timeline_length) timeline_toots = get_timeline(arguments.server, token, arguments.home_timeline_length)
fetch_timeline_context(timeline_toots, token, parsed_urls, seen_hosts, seen_urls, all_known_users, recently_checked_users) fetch_timeline_context(timeline_toots, token, parsed_urls, seen_hosts, seen_urls, all_known_users, recently_checked_users)
@ -1701,7 +1700,7 @@ if __name__ == "__main__":
user_id = get_user_id(arguments.server, arguments.user, token) user_id = get_user_id(arguments.server, arguments.user, token)
followings = get_new_followings(arguments.server, user_id, arguments.max_followings, all_known_users) followings = get_new_followings(arguments.server, user_id, arguments.max_followings, all_known_users)
add_user_posts(arguments.server, token, followings, known_followings, all_known_users, seen_urls, seen_hosts) add_user_posts(arguments.server, token, followings, known_followings, all_known_users, seen_urls, seen_hosts)
if arguments.max_followers > 0: if arguments.max_followers > 0:
logger.info(f"Getting posts from last {arguments.max_followers} followers") logger.info(f"Getting posts from last {arguments.max_followers} followers")
user_id = get_user_id(arguments.server, arguments.user, token) user_id = get_user_id(arguments.server, arguments.user, token)
@ -1758,7 +1757,7 @@ if __name__ == "__main__":
logger.info(f"Processing finished in {datetime.now() - start}.") logger.info(f"Processing finished in {datetime.now() - start}.")
except Exception as ex: except Exception:
os.remove(LOCK_FILE) os.remove(LOCK_FILE)
logger.error(f"Job failed after {datetime.now() - start}.") logger.error(f"Job failed after {datetime.now() - start}.")
if(arguments.on_fail != None and arguments.on_fail != ''): if(arguments.on_fail != None and arguments.on_fail != ''):