mirror of
https://github.com/nicholasr-itsulu/FediFetcher.git
synced 2026-05-30 23:41:26 +00:00
Try to use xxHash to hash robots cache file names
This should be faster and more efficient.
This commit is contained in:
parent
df22348eb1
commit
939e775c27
2 changed files with 3 additions and 2 deletions
|
|
@ -16,7 +16,7 @@ import uuid
|
|||
import defusedxml.ElementTree as ET
|
||||
import urllib.robotparser
|
||||
from urllib.parse import urlparse
|
||||
import hashlib
|
||||
import xxhash
|
||||
|
||||
logger = logging.getLogger("FediFetcher")
|
||||
robotParser = urllib.robotparser.RobotFileParser()
|
||||
|
|
@ -1076,7 +1076,7 @@ def get_paginated_mastodon(url, max, headers = {}, timeout = 0, max_tries = 5):
|
|||
return result
|
||||
|
||||
def get_robots_txt_cache_path(robots_url):
|
||||
hash = hashlib.sha256(robots_url.encode('utf-8'))
|
||||
hash = xxhash.xxh128(robots_url.encode('utf-8'))
|
||||
return os.path.join(arguments.state_dir, f'robots-{hash.hexdigest()}.txt')
|
||||
|
||||
def get_cached_robots(robots_url):
|
||||
|
|
|
|||
|
|
@ -12,3 +12,4 @@ requests==2.32.0
|
|||
six==1.16.0
|
||||
smmap==5.0.0
|
||||
urllib3==1.26.19
|
||||
xxhash==3.4.1
|
||||
Loading…
Reference in a new issue