Merge pull request #157 from nanos/use-xxhash
Try to use xxHash to hash robots cache file names
This commit is contained in:
commit
058643bb6d
2 changed files with 3 additions and 2 deletions
|
|
@ -16,7 +16,7 @@ import uuid
|
||||||
import defusedxml.ElementTree as ET
|
import defusedxml.ElementTree as ET
|
||||||
import urllib.robotparser
|
import urllib.robotparser
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
import hashlib
|
import xxhash
|
||||||
|
|
||||||
logger = logging.getLogger("FediFetcher")
|
logger = logging.getLogger("FediFetcher")
|
||||||
robotParser = urllib.robotparser.RobotFileParser()
|
robotParser = urllib.robotparser.RobotFileParser()
|
||||||
|
|
@ -1076,7 +1076,7 @@ def get_paginated_mastodon(url, max, headers = {}, timeout = 0, max_tries = 5):
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def get_robots_txt_cache_path(robots_url):
|
def get_robots_txt_cache_path(robots_url):
|
||||||
hash = hashlib.sha256(robots_url.encode('utf-8'))
|
hash = xxhash.xxh128(robots_url.encode('utf-8'))
|
||||||
return os.path.join(arguments.state_dir, f'robots-{hash.hexdigest()}.txt')
|
return os.path.join(arguments.state_dir, f'robots-{hash.hexdigest()}.txt')
|
||||||
|
|
||||||
def get_cached_robots(robots_url):
|
def get_cached_robots(robots_url):
|
||||||
|
|
|
||||||
|
|
@ -12,3 +12,4 @@ requests==2.32.0
|
||||||
six==1.16.0
|
six==1.16.0
|
||||||
smmap==5.0.0
|
smmap==5.0.0
|
||||||
urllib3==1.26.19
|
urllib3==1.26.19
|
||||||
|
xxhash==3.4.1
|
||||||
Loading…
Reference in a new issue