Merge pull request #157 from nanos/use-xxhash

Try to use xxHash to hash robots cache file names
This commit is contained in:
Michael 2024-09-02 17:06:38 +01:00 committed by GitHub
commit 058643bb6d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 3 additions and 2 deletions

View file

@ -16,7 +16,7 @@ import uuid
import defusedxml.ElementTree as ET
import urllib.robotparser
from urllib.parse import urlparse
import hashlib
import xxhash
logger = logging.getLogger("FediFetcher")
robotParser = urllib.robotparser.RobotFileParser()
@ -1076,7 +1076,7 @@ def get_paginated_mastodon(url, max, headers = {}, timeout = 0, max_tries = 5):
return result
def get_robots_txt_cache_path(robots_url):
hash = hashlib.sha256(robots_url.encode('utf-8'))
hash = xxhash.xxh128(robots_url.encode('utf-8'))
return os.path.join(arguments.state_dir, f'robots-{hash.hexdigest()}.txt')
def get_cached_robots(robots_url):

View file

@ -12,3 +12,4 @@ requests==2.32.0
six==1.16.0
smmap==5.0.0
urllib3==1.26.19
xxhash==3.4.1