From 4080d46541a16c9f27d05add0957b90d13b39180 Mon Sep 17 00:00:00 2001 From: Lacerum Date: Mon, 18 Apr 2022 18:04:11 -0700 Subject: [PATCH] added my todo for traps so far --- spacetime-crawler4py-master/scraper.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/spacetime-crawler4py-master/scraper.py b/spacetime-crawler4py-master/scraper.py index cfa07d9..fdf6d60 100644 --- a/spacetime-crawler4py-master/scraper.py +++ b/spacetime-crawler4py-master/scraper.py @@ -138,6 +138,11 @@ def is_valid(url): # maybe this should go in the next link? elif not robots_ok(url): return False + # https://support.archive-it.org/hc/en-us/articles/208332963-Modify-crawl-scope-with-a-Regular-Expression + # add lem check + # add another dir check + # add extra dir check + # add cal check else: return True