mirror of
https://github.com/nicholasr-itsulu/FediFetcher.git
synced 2026-05-30 23:41:26 +00:00
Merge pull request #161 from bentasker/instance_banlist
feat: add support for instance banlist
This commit is contained in:
commit
fe6ce1af43
3 changed files with 10 additions and 1 deletions
|
|
@ -151,6 +151,7 @@ Option | Required? | Notes |
|
|||
|:----------------------------------------------------|-----------|:------|
|
||||
|`access-token` | Yes | The access token. If using GitHub action, this needs to be provided as a Secret called `ACCESS_TOKEN`. If running as a cron job or a container, you can supply this option as array, to [fetch posts for multiple users](https://blog.thms.uk/2023/04/muli-user-support-for-fedifetcher) on your instance. To set tokens for multiple users using environment variables, define multiple environment variables with `FF_ACCESS_TOKEN` prefix, eg. `FF_ACCESS_TOKEN_USER1=…` and `FF_ACCESS_TOKEN_USER2=…`|
|
||||
|`server`|Yes|The domain only of your mastodon server (without `https://` prefix) e.g. `mstdn.thms.uk`. |
|
||||
|`instance-blocklist` | No | A comma seperated list of instance domains that FediFetcher should never attempt to connect to.
|
||||
|`home-timeline-length` | No | Provide to fetch remote replies to posts in the API-Key owner's home timeline. Determines how many posts we'll fetch replies for. Recommended value: `200`.
|
||||
| `max-bookmarks` | No | Provide to fetch remote replies to any posts you have bookmarked. Determines how many of your bookmarks you want to get replies to. Recommended value: `80`. Requires an access token with `read:bookmarks` scope.
|
||||
| `max-favourites` | No | Provide to fetch remote replies to any posts you have favourited. Determines how many of your favourites you want to get replies to. Recommended value: `40`. Requires an access token with `read:favourites` scope.
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ argparser.add_argument('--max-list-length', required=False, type=int, default=10
|
|||
argparser.add_argument('--max-list-accounts', required=False, type=int, default=10, help="Determines how many accounts we'll backfill for in each list. This will be ignored, unless you also provide `from-lists = 1`. Set to `0` if you only want to fetch replies in lists.")
|
||||
argparser.add_argument('--log-level', required=False, default="DEBUG", help="Severity of events to log (DEBUG|INFO|WARNING|ERROR|CRITICAL)")
|
||||
argparser.add_argument('--log-format', required=False, type=str, default="%(asctime)s: %(message)s",help="Specify the log format")
|
||||
argparser.add_argument('--instance-blocklist', required=False, type=str, default="",help="A comma-seperated array of instances that FediFetcher should never try to connect to")
|
||||
|
||||
def get_notification_users(server, access_token, known_users, max_age):
|
||||
since = datetime.now(datetime.now().astimezone().tzinfo) - timedelta(hours=max_age)
|
||||
|
|
@ -1120,6 +1121,10 @@ def can_fetch(user_agent, url):
|
|||
parsed_uri = urlparse(url)
|
||||
robots_url = '{uri.scheme}://{uri.netloc}/robots.txt'.format(uri=parsed_uri)
|
||||
|
||||
if parsed_uri.netloc in INSTANCE_BLOCKLIST:
|
||||
# Never connect to these locations
|
||||
raise Exception(f"Connecting to {parsed_uri.netloc} is prohibited by the configured blocklist")
|
||||
|
||||
robotsTxt = get_robots_from_url(robots_url)
|
||||
if isinstance(robotsTxt, bool):
|
||||
return robotsTxt
|
||||
|
|
@ -1501,7 +1506,8 @@ if __name__ == "__main__":
|
|||
"on_done",
|
||||
"on_fail",
|
||||
"log_level",
|
||||
"log_format"
|
||||
"log_format",
|
||||
"instance_blocklist"
|
||||
]:
|
||||
value = int(value)
|
||||
setattr(arguments, envvar, value)
|
||||
|
|
@ -1572,6 +1578,7 @@ if __name__ == "__main__":
|
|||
SEEN_HOSTS_FILE = os.path.join(arguments.state_dir, "seen_hosts")
|
||||
RECENTLY_CHECKED_CONTEXTS_FILE = os.path.join(arguments.state_dir, 'recent_context')
|
||||
|
||||
INSTANCE_BLOCKLIST = [x.strip() for x in arguments.instance_blocklist.split(",")]
|
||||
ROBOTS_TXT = {}
|
||||
|
||||
seen_urls = OrderedSet([])
|
||||
|
|
|
|||
|
|
@ -1446,6 +1446,7 @@ def test_can_fetch(mock_robotFileParser, mock_get_robots_from_url):
|
|||
# Prepare mocks
|
||||
mock_robotsTxt = MagicMock()
|
||||
mock_robotParser = MagicMock()
|
||||
find_posts.INSTANCE_BLOCKLIST = []
|
||||
|
||||
# Mock return values
|
||||
mock_get_robots_from_url.return_value = mock_robotsTxt
|
||||
|
|
|
|||
Loading…
Reference in a new issue