backfil mentioned users in list timelines

This commit is contained in:
nanos 2024-07-02 07:34:15 +01:00
parent d863b58513
commit f4873e7c8e
2 changed files with 42 additions and 25 deletions

View file

@ -1381,6 +1381,30 @@ def get_list_users(server, list, token, max):
logger.info(f"Found {len(accounts)} accounts in list {list['title']}") logger.info(f"Found {len(accounts)} accounts in list {list['title']}")
return accounts return accounts
def fetch_timeline_context(timeline_posts, token, parsed_urls, seen_hosts, seen_urls, all_known_users, recently_checked_users):
known_context_urls = get_all_known_context_urls(arguments.server, timeline_posts,parsed_urls, seen_hosts)
add_context_urls(arguments.server, token, known_context_urls, seen_urls)
# Backfill any post authors, and any mentioned users
if arguments.backfill_mentioned_users > 0:
mentioned_users = []
cut_off = datetime.now(datetime.now().astimezone().tzinfo) - timedelta(minutes=60)
for toot in timeline_posts:
these_users = []
toot_created_at = parser.parse(toot['created_at'])
if len(mentioned_users) < 10 or (toot_created_at > cut_off and len(mentioned_users) < 30):
these_users.append(toot['account'])
if(len(toot['mentions'])):
these_users += toot['mentions']
if(toot['reblog'] != None):
these_users.append(toot['reblog']['account'])
if(len(toot['reblog']['mentions'])):
these_users += toot['reblog']['mentions']
for user in these_users:
if user not in mentioned_users and user['acct'] not in all_known_users:
mentioned_users.append(user)
add_user_posts(arguments.server, token, filter_known_users(mentioned_users, all_known_users), recently_checked_users, all_known_users, seen_urls, seen_hosts)
if __name__ == "__main__": if __name__ == "__main__":
start = datetime.now() start = datetime.now()
@ -1551,12 +1575,12 @@ if __name__ == "__main__":
if arguments.from_lists: if arguments.from_lists:
"""Pull replies from lists""" """Pull replies from lists"""
lists = get_user_lists(arguments.server, token) lists = get_user_lists(arguments.server, token)
logger.info(f"Getting context for {len(lists)} lists")
for user_list in lists: for user_list in lists:
# Fill context from list # Fill context from list
if arguments.max_list_length > 0: if arguments.max_list_length > 0:
timeline_toots = get_list_timeline(arguments.server, user_list, token, arguments.max_list_length) timeline_toots = get_list_timeline(arguments.server, user_list, token, arguments.max_list_length)
known_context_urls = get_all_known_context_urls(arguments.server, timeline_toots,parsed_urls, seen_hosts) fetch_timeline_context(timeline_toots, token, parsed_urls, seen_hosts, seen_urls, all_known_users, recently_checked_users)
add_context_urls(arguments.server, token, known_context_urls, seen_urls)
# Backfill profiles from list # Backfill profiles from list
if arguments.max_list_accounts: if arguments.max_list_accounts:
@ -1581,30 +1605,9 @@ if __name__ == "__main__":
if arguments.home_timeline_length > 0: if arguments.home_timeline_length > 0:
"""Do the same with any toots on the key owner's home timeline """ """Do the same with any toots on the key owner's home timeline """
logger.info(f"Getting context for home timeline")
timeline_toots = get_timeline(arguments.server, token, arguments.home_timeline_length) timeline_toots = get_timeline(arguments.server, token, arguments.home_timeline_length)
known_context_urls = get_all_known_context_urls(arguments.server, timeline_toots,parsed_urls, seen_hosts) fetch_timeline_context(timeline_toots, token, parsed_urls, seen_hosts, seen_urls, all_known_users, recently_checked_users)
add_context_urls(arguments.server, token, known_context_urls, seen_urls)
# Backfill any post authors, and any mentioned users
if arguments.backfill_mentioned_users > 0:
mentioned_users = []
cut_off = datetime.now(datetime.now().astimezone().tzinfo) - timedelta(minutes=60)
for toot in timeline_toots:
these_users = []
toot_created_at = parser.parse(toot['created_at'])
if len(mentioned_users) < 10 or (toot_created_at > cut_off and len(mentioned_users) < 30):
these_users.append(toot['account'])
if(len(toot['mentions'])):
these_users += toot['mentions']
if(toot['reblog'] != None):
these_users.append(toot['reblog']['account'])
if(len(toot['reblog']['mentions'])):
these_users += toot['reblog']['mentions']
for user in these_users:
if user not in mentioned_users and user['acct'] not in all_known_users:
mentioned_users.append(user)
add_user_posts(arguments.server, token, filter_known_users(mentioned_users, all_known_users), recently_checked_users, all_known_users, seen_urls, seen_hosts)
if arguments.max_followings > 0: if arguments.max_followings > 0:
logger.info(f"Getting posts from last {arguments.max_followings} followings") logger.info(f"Getting posts from last {arguments.max_followings} followings")

14
uniq Normal file
View file

@ -0,0 +1,14 @@
Error getting context for toot https://bsd.network/@lattera/112695266248144937. Exception: Querying https://bsd.network/api/v1/statuses/112695266248144937/context prohibited by robots.txt
Error getting context for toot https://glitch.social/@wilbr/112708074029292084. Exception: Querying https://glitch.social/api/v1/statuses/112708074029292084/context prohibited by robots.txt
Error getting context for toot https://mastodon.bida.im/@redhotcyber/112693534053156334. Exception: Querying https://mastodon.bida.im/api/v1/statuses/112693534053156334/context prohibited by robots.txt
Error getting context for toot https://toot.cafe/@aardrian/112695640079712832. Exception: Querying https://toot.cafe/api/v1/statuses/112695640079712832/context prohibited by robots.txt
Error getting host node info for flipboard.com. Exception: Querying https://flipboard.com/.well-known/nodeinfo prohibited by robots.txt
Error getting host node info for fsebugoutzone.org. Exception: Querying https://fsebugoutzone.org/.well-known/nodeinfo prohibited by robots.txt
Error getting host node info for glitterkitten.co.uk. Exception: Querying https://glitterkitten.co.uk/.well-known/nodeinfo prohibited by robots.txt
Error getting host node info for kitty.town. Exception: Querying https://kitty.town/.well-known/nodeinfo prohibited by robots.txt
Error getting host node info for mastodon.bentasker.co.uk. Exception: Querying https://mastodon.bentasker.co.uk/.well-known/nodeinfo prohibited by robots.txt
Error getting host node info for puddle.town. Exception: Querying https://puddle.town/.well-known/nodeinfo prohibited by robots.txt
Error getting host node info for shitposter.world. Exception: Querying https://shitposter.world/.well-known/nodeinfo prohibited by robots.txt
Error getting host node info for threads.net. Exception: Querying https://threads.net/.well-known/nodeinfo prohibited by robots.txt
Error getting host node info for www.threads.net. Exception: Querying https://www.threads.net/.well-known/nodeinfo prohibited by robots.txt
Error getting user ID for user aardrian: Querying https://toot.cafe/api/v1/accounts/lookup?acct=aardrian prohibited by robots.txt