From aa589670eb7c69907bc7894f2201ce52edace96c Mon Sep 17 00:00:00 2001 From: nanos Date: Thu, 27 Jun 2024 09:06:03 +0100 Subject: [PATCH 1/2] Do not backfill users that have opted out of indexing --- find_posts.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/find_posts.py b/find_posts.py index 5e7d574..6ed3aac 100644 --- a/find_posts.py +++ b/find_posts.py @@ -112,7 +112,19 @@ def add_post_with_context(post, server, access_token, seen_urls, seen_hosts): return False +def user_has_opted_out(user): + if 'note' in user and isinstance(user['note'], str) and (' nobot' in user['note'].lower() or '/tags/nobot' in user['note'].lower()): + return True + if 'indexable' in user and not user['indexable']: + return True + if 'discoverable' in user and not user['discoverable']: + return True + return False + + def get_user_posts(user, known_followings, server, seen_hosts): + if user_has_opted_out(user): + return None parsed_url = parse_user_url(user['url']) if parsed_url == None: From d2a14f687a31ddfdaebe3141a2488b1b045188ba Mon Sep 17 00:00:00 2001 From: nanos Date: Thu, 27 Jun 2024 09:18:06 +0100 Subject: [PATCH 2/2] log what's happening --- find_posts.py | 1 + 1 file changed, 1 insertion(+) diff --git a/find_posts.py b/find_posts.py index 6ed3aac..a75d95d 100644 --- a/find_posts.py +++ b/find_posts.py @@ -124,6 +124,7 @@ def user_has_opted_out(user): def get_user_posts(user, known_followings, server, seen_hosts): if user_has_opted_out(user): + logger.debug(f"User {user} has opted out of backfilling") return None parsed_url = parse_user_url(user['url'])