added commented out robot check in next link
This commit is contained in:
parent
1fbcb81fae
commit
0e5af0a4c7
@ -82,7 +82,13 @@ def extract_next_links(url, resp):
|
|||||||
#skipping query with specific actions which mutate the websites and cause a trap
|
#skipping query with specific actions which mutate the websites and cause a trap
|
||||||
if "do=" in href_link:
|
if "do=" in href_link:
|
||||||
continue
|
continue
|
||||||
|
'''
|
||||||
|
# this is currently in the is_vaild but implimended in a different way, don't know which one would make more sense
|
||||||
|
# skip as not allowed
|
||||||
|
if not robots_ok(href_link):
|
||||||
|
continue
|
||||||
|
'''
|
||||||
|
|
||||||
tempFile.write(href_link + "\n")
|
tempFile.write(href_link + "\n")
|
||||||
#Adding to the boi wonder pages
|
#Adding to the boi wonder pages
|
||||||
pages.append(href_link)
|
pages.append(href_link)
|
||||||
@ -129,6 +135,7 @@ def is_valid(url):
|
|||||||
return False
|
return False
|
||||||
elif is_a_loop_trap(url):
|
elif is_a_loop_trap(url):
|
||||||
return False
|
return False
|
||||||
|
# maybe this should go in the next link?
|
||||||
elif not robots_ok(url):
|
elif not robots_ok(url):
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
|
Loading…
Reference in New Issue
Block a user