Load balancer installed, havent not been able to test yet

This commit is contained in:
Hieuhuy Pham 2022-04-22 16:51:32 -07:00
parent 8b96a7c9f7
commit 90a5d16456
6 changed files with 358 additions and 52 deletions

View File

@ -31,3 +31,24 @@
2022-04-20 04:01:31,386 - FRONTIER - INFO - Found save file frontier.shelve, deleting it. 2022-04-20 04:01:31,386 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-20 04:02:16,043 - FRONTIER - INFO - Found save file frontier.shelve, deleting it. 2022-04-20 04:02:16,043 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-20 18:08:59,911 - FRONTIER - INFO - Found save file frontier.shelve, deleting it. 2022-04-20 18:08:59,911 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-22 12:31:49,310 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-22 12:32:31,178 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-22 12:32:35,094 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-22 12:33:25,233 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-22 12:33:42,393 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-22 12:36:07,413 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-22 12:37:56,413 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-22 12:38:45,000 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-22 12:39:14,157 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-22 12:39:50,638 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-22 12:39:56,516 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-22 12:41:07,005 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-22 12:46:01,865 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-22 12:46:16,984 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-22 12:49:37,689 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-22 12:53:43,854 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-22 12:54:45,134 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-22 12:56:48,517 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-22 12:57:19,541 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-22 13:02:40,174 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.
2022-04-22 13:07:26,611 - FRONTIER - INFO - Found save file frontier.shelve, deleting it.

View File

@ -42,3 +42,262 @@
2022-04-20 18:09:20,376 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu/seminar-series-2020-2021, status <200>, using cache ('styx.ics.uci.edu', 9002). 2022-04-20 18:09:20,376 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu/seminar-series-2020-2021, status <200>, using cache ('styx.ics.uci.edu', 9002).
2022-04-20 18:09:22,307 - Worker-0 - INFO - Downloaded http://www.stat.uci.edu/seminar-series/seminar-series-2015-2016, status <200>, using cache ('styx.ics.uci.edu', 9002). 2022-04-20 18:09:22,307 - Worker-0 - INFO - Downloaded http://www.stat.uci.edu/seminar-series/seminar-series-2015-2016, status <200>, using cache ('styx.ics.uci.edu', 9002).
2022-04-20 18:09:24,295 - Worker-0 - INFO - Downloaded http://www.stat.uci.edu/ICS/statistics/research/seminarseries/2011-2012/index, status <404>, using cache ('styx.ics.uci.edu', 9002). 2022-04-20 18:09:24,295 - Worker-0 - INFO - Downloaded http://www.stat.uci.edu/ICS/statistics/research/seminarseries/2011-2012/index, status <404>, using cache ('styx.ics.uci.edu', 9002).
2022-04-22 12:31:49,323 - Worker-4 - INFO - Frontier is empty. Stopping Crawler.
2022-04-22 12:32:31,191 - Worker-4 - INFO - Frontier is empty. Stopping Crawler.
2022-04-22 12:32:35,107 - Worker-4 - INFO - Frontier is empty. Stopping Crawler.
2022-04-22 12:33:25,245 - Worker-4 - INFO - Frontier is empty. Stopping Crawler.
2022-04-22 12:33:42,404 - Worker-4 - INFO - Frontier is empty. Stopping Crawler.
2022-04-22 12:36:07,425 - Worker-4 - INFO - Frontier is empty. Stopping Crawler.
2022-04-22 12:36:07,547 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:36:07,572 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:36:07,589 - Worker-2 - INFO - Downloaded https://www.cs.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:36:07,634 - Worker-1 - INFO - Downloaded https://www.informatics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:37:56,426 - Worker-4 - INFO - Frontier is empty. Stopping Crawler.
2022-04-22 12:37:56,588 - Worker-2 - INFO - Downloaded https://www.cs.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:37:56,597 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:37:56,637 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:37:56,666 - Worker-1 - INFO - Downloaded https://www.informatics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:38:45,013 - Worker-4 - INFO - Frontier is empty. Stopping Crawler.
2022-04-22 12:38:45,135 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:38:45,151 - Worker-2 - INFO - Downloaded https://www.cs.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:38:45,191 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:38:45,217 - Worker-1 - INFO - Downloaded https://www.informatics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:41:07,016 - Worker-4 - INFO - Frontier is empty. Stopping Crawler.
2022-04-22 12:41:07,146 - Worker-2 - INFO - Downloaded https://www.cs.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:41:07,158 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:41:07,196 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:41:07,228 - Worker-1 - INFO - Downloaded https://www.informatics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:46:01,876 - Worker-4 - INFO - Frontier is empty. Stopping Crawler.
2022-04-22 12:46:02,007 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:46:02,032 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:46:02,050 - Worker-2 - INFO - Downloaded https://www.cs.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:46:02,092 - Worker-1 - INFO - Downloaded https://www.informatics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:46:16,998 - Worker-4 - INFO - Frontier is empty. Stopping Crawler.
2022-04-22 12:46:17,133 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:46:17,142 - Worker-2 - INFO - Downloaded https://www.cs.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:46:17,188 - Worker-1 - INFO - Downloaded https://www.informatics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:46:17,215 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:49:37,703 - Worker-4 - INFO - Frontier is empty. Stopping Crawler.
2022-04-22 12:49:37,836 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:49:37,850 - Worker-2 - INFO - Downloaded https://www.cs.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:49:37,889 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:49:37,916 - Worker-1 - INFO - Downloaded https://www.informatics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9005).
2022-04-22 12:53:43,868 - Worker-4 - INFO - Frontier is empty. Stopping Crawler.
2022-04-22 12:53:44,012 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:53:44,027 - Worker-2 - INFO - Downloaded https://www.cs.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:53:44,064 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:53:44,096 - Worker-1 - INFO - Downloaded https://www.informatics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:53:47,994 - Worker-2 - INFO - Downloaded https://www.cs.uci.edu/multidisciplinary-collaborators-set-their-sights-on-color-vision-in-the-dark, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:53:50,268 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu/ugrad/sao/SAO_Events.php, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:53:50,689 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu/mds-student-ty-shao-aims-to-make-an-impact-in-healthcare, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:53:51,848 - Worker-2 - INFO - Downloaded https://www.stat.uci.edu/what-is-statistics, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:54:45,146 - Worker-4 - INFO - Frontier is empty. Stopping Crawler.
2022-04-22 12:54:45,261 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:54:45,291 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:54:45,309 - Worker-2 - INFO - Downloaded https://www.cs.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:54:45,354 - Worker-1 - INFO - Downloaded https://www.informatics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:54:48,692 - Worker-2 - INFO - Downloaded https://www.ics.uci.edu/employment/employ_faculty, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:54:51,109 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu/contact-the-department, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:56:48,530 - Worker-4 - INFO - Frontier is empty. Stopping Crawler.
2022-04-22 12:56:48,650 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:56:48,665 - Worker-2 - INFO - Downloaded https://www.cs.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:56:48,700 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:56:48,730 - Worker-1 - INFO - Downloaded https://www.informatics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:56:51,960 - Worker-2 - INFO - Downloaded https://www.cs.uci.edu/research-centers, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:56:52,710 - Worker-1 - INFO - Downloaded https://www.informatics.uci.edu/brochure-tiles/what-we-build, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:19,552 - Worker-4 - INFO - Frontier is empty. Stopping Crawler.
2022-04-22 12:57:19,673 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:19,686 - Worker-2 - INFO - Downloaded https://www.cs.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:19,719 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:19,750 - Worker-1 - INFO - Downloaded https://www.informatics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:23,504 - Worker-1 - INFO - Downloaded https://www.informatics.uci.edu/support/collaborate-on-research, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:24,121 - Worker-1 - INFO - Downloaded https://www.informatics.uci.edu/grad/policies, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:24,641 - Worker-0 - INFO - Downloaded http://www.stat.uci.edu/news, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:24,847 - Worker-2 - INFO - Downloaded https://www.cs.uci.edu/contact, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:25,355 - Worker-2 - INFO - Downloaded http://www.cs.uci.edu/professor-of-teaching-in-computer-science, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:25,862 - Worker-2 - INFO - Downloaded https://www.cs.uci.edu/nlp-benchmarks-ml-devops-interoperability-with-yasaman-razeghi-and-prof-sameer-singh-machine-learning-street-talk-podcast, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:26,044 - Worker-1 - INFO - Downloaded http://www.ics.uci.edu/grad/policies/index.php, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:26,259 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu/accessibility, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:26,685 - Worker-1 - INFO - Downloaded http://www.ics.uci.edu/computing/policy/ethics.php, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:26,806 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu/grad, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:27,008 - Worker-2 - INFO - Downloaded http://www.ics.uci.edu/computing/policy/index.php, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:27,321 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu/grad/policies, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:27,861 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu/ugrad, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:29,432 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu/news/page/3, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:29,838 - Worker-2 - INFO - Downloaded http://www.ics.uci.edu/feed.php, status <404>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:29,856 - Worker-1 - INFO - Downloaded http://www.ics.uci.edu/doku.php/security?idx=security, status <404>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:30,208 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu/community/news/view_news?id=2068, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:30,347 - Worker-2 - INFO - Downloaded https://www.ics.uci.edu/community/news/view_news.php?id=2069, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:30,375 - Worker-1 - INFO - Downloaded https://www.ics.uci.edu/community, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:30,732 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu/community/news/view_news.php?id=2067, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:30,891 - Worker-1 - INFO - Downloaded https://www.ics.uci.edu/ugrad/policies, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:31,405 - Worker-1 - INFO - Downloaded http://www.ics.uci.edu/doku.php/backups?idx=backups, status <404>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:31,886 - Worker-2 - INFO - Downloaded https://www.ics.uci.edu/ugrad/index.php, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:31,921 - Worker-1 - INFO - Downloaded https://www.ics.uci.edu/community/news/view_news.php?id=2068, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:32,406 - Worker-2 - INFO - Downloaded https://www.ics.uci.edu/community/news/view_news.php?id=2070, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:32,431 - Worker-1 - INFO - Downloaded http://www.ics.uci.edu/doku.php/commands?idx=commands, status <404>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:32,943 - Worker-1 - INFO - Downloaded https://www.ics.uci.edu/community/news/view_news.php?id=2071, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:32,990 - Worker-2 - INFO - Downloaded http://www.ics.uci.edu/doku.php/software?idx=software, status <404>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:33,492 - Worker-2 - INFO - Downloaded http://www.ics.uci.edu/doku.php/accounts:password_change_reset, status <404>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:34,000 - Worker-2 - INFO - Downloaded http://www.ics.uci.edu/doku.php/tag_search, status <404>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:34,504 - Worker-2 - INFO - Downloaded http://www.ics.uci.edu/doku.php/troubleshooting_and_triage_faq, status <404>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:35,009 - Worker-2 - INFO - Downloaded http://www.ics.uci.edu/doku.php/accounts:remote_desktop_connection, status <404>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:35,218 - Worker-1 - INFO - Downloaded https://create.ics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:35,494 - Worker-3 - INFO - Downloaded https://www.informatics.uci.edu/explore/faculty-profiles/constance-steinkuehler, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:35,521 - Worker-2 - INFO - Downloaded https://www.ics.uci.edu/community/news/view_news.php?id=2066, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:35,901 - Worker-1 - INFO - Downloaded https://create.ics.uci.edu/people, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:36,006 - Worker-3 - INFO - Downloaded https://create.ics.uci.edu/contact, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:36,552 - Worker-3 - INFO - Downloaded https://create.ics.uci.edu/events, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:36,647 - Worker-2 - INFO - Downloaded https://www.ics.uci.edu/community/news/view_news.php?id=2065, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:37,085 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu/community/news/view_news.php?id=2064, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:37,212 - Worker-2 - INFO - Downloaded https://create.ics.uci.edu/2021/04/06/atlas-kate-crawford, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:37,732 - Worker-2 - INFO - Downloaded https://www.ics.uci.edu/~aburtsev, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:38,329 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu/lifetime-achievement-and-dinsdale-awards-double-the-accolades-for-professor-emerita-jessica-utts, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:39,721 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu/from-theory-to-practice-ucis-machine-learning-hackathon-delivers, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:40,215 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu/community/news/view_news.php?id=2063, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:40,783 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu/harnessing-the-data-revolution-ics-researchers-and-collaborators-receive-1-5m-data-science-grant, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:41,300 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu/mine-dogucu-receives-young-investigator-award, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:43,540 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu/~swjun, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:43,843 - Worker-2 - INFO - Downloaded https://www.ics.uci.edu/community/news/view_news.php?id=2062, status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 12:57:44,800 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu/community/news/view_news?id=2063", status <200>, using cache ('styx.ics.uci.edu', 9003).
2022-04-22 13:02:40,187 - Worker-4 - INFO - Frontier is empty. Stopping Crawler.
2022-04-22 13:02:40,350 - Worker-2 - INFO - Downloaded https://www.cs.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:02:40,361 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:02:40,402 - Worker-1 - INFO - Downloaded https://www.informatics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:02:44,480 - Worker-2 - INFO - Downloaded http://www.cs.uci.edu/distinguished-lecture-series, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:02:44,984 - Worker-2 - INFO - Downloaded https://www.cs.uci.edu/graduate-computer-science-programs, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:02:46,039 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:02:47,007 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu/faculty, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:02:50,811 - Worker-1 - INFO - Downloaded https://www.informatics.uci.edu/brochure-tiles/how-we-work, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:02:50,943 - Worker-2 - INFO - Downloaded http://mcs.ics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:02:51,215 - Worker-0 - INFO - Downloaded http://www.stat.uci.edu/faculty-directory/wesley-johnson, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:02:54,965 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu/accessibility, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:26,624 - Worker-4 - INFO - Frontier is empty. Stopping Crawler.
2022-04-22 13:07:26,771 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:26,786 - Worker-2 - INFO - Downloaded https://www.cs.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:26,817 - Worker-1 - INFO - Downloaded https://www.informatics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:30,572 - Worker-2 - INFO - Downloaded https://www.cs.uci.edu/faculty, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:32,352 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:32,484 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu/slider/uci-launches-new-professional-program-master-of-data-science, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:37,647 - Worker-1 - INFO - Downloaded https://www.informatics.uci.edu/support/set-future-agenda, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:37,747 - Worker-0 - INFO - Downloaded https://mds.ics.uci.edu/prospective-students/cost-and-financial-aid, status <404>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:37,845 - Worker-2 - INFO - Downloaded http://ngs.ics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:51,214 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu/ugrad/policies/Add_Drop_ChangeOption, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:51,450 - Worker-1 - INFO - Downloaded http://www.informatics.uci.edu/menu-very-top/contact, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:51,679 - Worker-0 - INFO - Downloaded https://www.ics.uci.edu/about/search/search_graduate_all.php, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:51,883 - Worker-2 - INFO - Downloaded https://ngs.ics.uci.edu/research/research-papers, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:53,741 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu/ugrad/policies, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:54,988 - Worker-1 - INFO - Downloaded https://ngs.ics.uci.edu/personal, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:55,068 - Worker-0 - INFO - Downloaded http://www.ics.uci.edu/~anayk1, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:55,145 - Worker-2 - INFO - Downloaded http://www.ics.uci.edu/~rjmirand, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:55,224 - Worker-3 - INFO - Downloaded http://www.ics.uci.edu/~yingtong, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:57,546 - Worker-1 - INFO - Downloaded http://www.ics.uci.edu/~xinyuns, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:57,622 - Worker-0 - INFO - Downloaded http://www.ics.uci.edu/~vyenugut, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:57,698 - Worker-2 - INFO - Downloaded http://www.ics.uci.edu/~jiapez2, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:57,774 - Worker-3 - INFO - Downloaded http://www.ics.uci.edu/~chughm, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:58,054 - Worker-1 - INFO - Downloaded http://www.ics.uci.edu/~radhiks3, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:58,132 - Worker-0 - INFO - Downloaded http://www.ics.uci.edu/~seoyh1, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:58,208 - Worker-2 - INFO - Downloaded http://www.ics.uci.edu/~stanisll, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:58,284 - Worker-3 - INFO - Downloaded http://www.ics.uci.edu/~shravanb, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:58,567 - Worker-1 - INFO - Downloaded http://www.ics.uci.edu/~jandal, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:58,644 - Worker-0 - INFO - Downloaded http://www.ics.uci.edu/~lucyp1, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:58,725 - Worker-2 - INFO - Downloaded http://www.ics.uci.edu/~srazavim, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:58,800 - Worker-3 - INFO - Downloaded http://www.ics.uci.edu/~akuppili, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:59,082 - Worker-1 - INFO - Downloaded http://www.ics.uci.edu/~youngik2, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:59,160 - Worker-0 - INFO - Downloaded http://www.ics.uci.edu/~mbehzadi, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:59,237 - Worker-2 - INFO - Downloaded http://www.ics.uci.edu/~purua, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:59,315 - Worker-3 - INFO - Downloaded http://www.ics.uci.edu/~junchez3, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:59,594 - Worker-1 - INFO - Downloaded http://www.ics.uci.edu/~cangaoc, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:07:59,674 - Worker-0 - INFO - Downloaded http://www.ics.uci.edu/~smcaleer, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:08:05,700 - Worker-2 - INFO - Downloaded http://www.ics.uci.edu/~lhe11, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:08:05,778 - Worker-3 - INFO - Downloaded http://www.ics.uci.edu/~mheddes, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:08:05,859 - Worker-1 - INFO - Downloaded http://www.ics.uci.edu/~ecvu, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:08:06,321 - Worker-0 - INFO - Downloaded https://cml.ics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:08:07,086 - Worker-2 - INFO - Downloaded https://acoi.ics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:08:08,185 - Worker-3 - INFO - Downloaded http://www.ics.uci.edu/~unala, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:08:08,265 - Worker-1 - INFO - Downloaded http://www.ics.uci.edu/~phucht1, status <403>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:08:08,366 - Worker-0 - INFO - Downloaded https://cml.ics.uci.edu/alumni, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:20,447 - Worker-2 - INFO - Downloaded https://acoi.ics.uci.edu/seminar-series, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:21,453 - Worker-3 - INFO - Downloaded https://acoi.ics.uci.edu/2022/02/21/aco-annual-distinguished-lecture-by-yinyu-ye, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:21,576 - Worker-1 - INFO - Downloaded https://acoi.ics.uci.edu/news, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:21,674 - Worker-0 - INFO - Downloaded http://www.ics.uci.edu/~sswamida, status <404>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:21,792 - Worker-2 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/optimization-techniques-for-complex-energy-infrastructure-systems, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:23,012 - Worker-3 - INFO - Downloaded https://acoi.ics.uci.edu/2019/08/20/perspectives-on-the-design-of-approximation-algorithms, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:23,137 - Worker-1 - INFO - Downloaded https://acoi.ics.uci.edu/category/news, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:23,258 - Worker-0 - INFO - Downloaded https://acoi.ics.uci.edu/2022, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:23,497 - Worker-2 - INFO - Downloaded https://acoi.ics.uci.edu/seminars, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:23,665 - Worker-3 - INFO - Downloaded https://acoi.ics.uci.edu/2019/08, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:23,789 - Worker-1 - INFO - Downloaded https://acoi.ics.uci.edu/2019, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:23,912 - Worker-0 - INFO - Downloaded https://acoi.ics.uci.edu/2019/08/20, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:24,043 - Worker-2 - INFO - Downloaded https://acoi.ics.uci.edu/2022/02, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:24,164 - Worker-3 - INFO - Downloaded https://acoi.ics.uci.edu/2022/02/21, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:24,391 - Worker-1 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/universal-laws-and-architectures-in-complex-networked-systems-with-applications-to-sensorimotor-control, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:24,653 - Worker-0 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/transparency-and-control-in-platforms-networked-markets, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:24,778 - Worker-2 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/sparse-polynomial-approximations-and-their-applications-to-quantum-advantage-parallel-computation-and-pseudorandomness, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:24,900 - Worker-3 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/nonlinear-regression-via-convex-programming, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:25,023 - Worker-1 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/building-optimization-beyond-minimization-a-journey-in-game-dynamics, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:25,161 - Worker-0 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/fountain-codes-with-applications, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:25,307 - Worker-2 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/no-signaling-proofs-their-applications-and-their-power, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:25,432 - Worker-3 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/a-strongly-polynomial-algorithm-for-linear-exchange-markets, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:25,557 - Worker-1 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/mean-estimation-in-low-and-high-dimensions, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:25,699 - Worker-0 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/explicit-binary-tree-codes-with-polylogarithmic-size-alphabet, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:25,833 - Worker-2 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/tight-multi-unit-prophet-inequalities-with-application-to-online-allocation, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:26,681 - Worker-3 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/computational-phase-transition-and-mcmc-algorithms, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:26,824 - Worker-1 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/capacity-of-neural-networks, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:26,949 - Worker-0 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/496, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:27,072 - Worker-2 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/tba-5, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:27,196 - Worker-3 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/planar-graph-perfect-matching-is-in-nc, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:27,326 - Worker-1 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/equilibrium-computation-and-machine-learning, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:27,450 - Worker-0 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/the-edgeworth-conjecture-with-small-coalitions-and-approximate-equilibria-in-large-economies, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:28,156 - Worker-2 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/stable-matching-voronoi-diagrams-combinatorial-complexity-and-algorithms, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:28,300 - Worker-3 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/non-convex-optimization-and-structured-signal-recovery, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:28,436 - Worker-1 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/the-maximum-diameter-of-oriented-matroids, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:28,560 - Worker-0 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/parametric-learning-for-directed-graphical-models, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:28,684 - Worker-2 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/tba-1, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:28,808 - Worker-3 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/matching-is-as-easy-as-the-decision-problem-in-the-nc-model, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:28,931 - Worker-1 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/solving-an-ill-posed-inverse-problem-of-three-dimensional-3d-vision, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:29,156 - Worker-0 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/the-quarks-of-attention, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:29,277 - Worker-2 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/dudeneys-no-three-in-line-problem, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:29,410 - Worker-3 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/network-biform-games-an-application-to-the-implementation-of-electronic-waste-legislation, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:29,547 - Worker-1 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/minimum-weight-combinatorial-structures-under-random-cost-constraints, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:29,673 - Worker-0 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/efficient-certifiable-randomness, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:29,799 - Worker-2 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/combinatorial-optimization-algorithms-for-clustering-and-machine-learning, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:29,945 - Worker-3 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/tba, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:30,074 - Worker-1 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/the-power-of-asking-more-informative-questions, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:30,201 - Worker-0 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/statistical-estimation-with-strategic-data-holders, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:30,324 - Worker-2 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/topological-network-alignment-comes-of-age, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:30,448 - Worker-3 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/mean-estimation-with-user-level-privacy-under-data-heterogeneity, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:30,576 - Worker-1 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/fast-sampling-via-spectral-independence-beyond-bounded-degree-graphs, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:30,700 - Worker-0 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/formal-verification-of-edmonds-blossom-algorithm, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:30,923 - Worker-2 - INFO - Downloaded https://acoi.ics.uci.edu/seminars/classical-verification-of-quantum-computations, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:31,030 - Worker-3 - INFO - Downloaded http://www.ics.uci.edu/~dchudova, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:32,004 - Worker-1 - INFO - Downloaded http://www.ics.uci.edu/~sgaffney, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:34,094 - Worker-0 - INFO - Downloaded http://ics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:34,247 - Worker-2 - INFO - Downloaded https://acoi.ics.uci.edu/contact, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:34,346 - Worker-3 - INFO - Downloaded http://www.ics.uci.edu/academics/graduate, status <404>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:34,464 - Worker-1 - INFO - Downloaded https://acoi.ics.uci.edu/people, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:45,500 - Worker-0 - INFO - Downloaded http://ics.uci.edu/ugrad/degrees/Minors, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:09:45,615 - Worker-2 - INFO - Downloaded http://ics.uci.edu/ugrad/degrees/index.php, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:10:00,103 - Worker-3 - INFO - Downloaded http://ics.uci.edu/ugrad/policies/Grade_Options, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:10:00,215 - Worker-1 - INFO - Downloaded https://www.ics.uci.edu/~babaks, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:10:06,923 - Worker-0 - INFO - Downloaded https://www.ics.uci.edu/ugrad/degrees, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:10:13,110 - Worker-2 - INFO - Downloaded http://ics.uci.edu/ugrad/policies, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:10:27,350 - Worker-3 - INFO - Downloaded http://ics.uci.edu/community/events/competition, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:10:29,391 - Worker-1 - INFO - Downloaded https://www.ics.uci.edu/community/news/view_news?id=1595, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:10:37,390 - Worker-0 - INFO - Downloaded https://www.ics.uci.edu/community/news/view_news?id=2036, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:10:47,764 - Worker-2 - INFO - Downloaded http://ics.uci.edu/ugrad/policies/Course_Outside_UCI, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:11:02,010 - Worker-3 - INFO - Downloaded http://ics.uci.edu/grad/admissions_faq/index, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:11:08,810 - Worker-1 - INFO - Downloaded https://www.ics.uci.edu/community, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:11:08,903 - Worker-0 - INFO - Downloaded https://mdogucu.ics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:11:11,774 - Worker-2 - INFO - Downloaded https://www.ics.uci.edu/community/news/view_news.php?id=2037, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:11:11,786 - Worker-0 - INFO - Downloaded https://www.stat.uci.edu/isi-buds, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:11:15,371 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu/grad/admissions/comparison_chart_masters.php, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:11:16,248 - Worker-1 - INFO - Downloaded https://mt-live.ics.uci.edu/research-areas, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:11:33,979 - Worker-0 - INFO - Downloaded https://www.ics.uci.edu/grad/admissions_faq, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:11:34,119 - Worker-2 - INFO - Downloaded https://www.ics.uci.edu/community/news/view_news.php?id=2038, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:11:35,270 - Worker-3 - INFO - Downloaded https://www.ics.uci.edu/~lopes, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:11:41,154 - Worker-1 - INFO - Downloaded https://mt-live.ics.uci.edu/faculty-staff-positions, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:11:41,884 - Worker-0 - INFO - Downloaded https://mt-live.ics.uci.edu/research-areas/computer-architecture, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:11:42,004 - Worker-2 - INFO - Downloaded https://www.ics.uci.edu/community/news/view_news.php?id=2039, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:11:45,064 - Worker-3 - INFO - Downloaded https://mswe.ics.uci.edu, status <200>, using cache ('styx.ics.uci.edu', 9004).
2022-04-22 13:11:47,846 - Worker-1 - INFO - Downloaded https://mt-live.ics.uci.edu/alumni/corporate-engagement, status <200>, using cache ('styx.ics.uci.edu', 9004).

View File

@ -16,5 +16,5 @@ POLITENESS = 0.5
SAVE = frontier.shelve SAVE = frontier.shelve
# IMPORTANT: DO NOT CHANGE IT IF YOU HAVE NOT IMPLEMENTED MULTITHREADING. # IMPORTANT: DO NOT CHANGE IT IF YOU HAVE NOT IMPLEMENTED MULTITHREADING.
THREADCOUNT = 1 THREADCOUNT = 5

View File

@ -15,21 +15,29 @@ from datacollection import *
#*.stat.uci.edu/* 3 #*.stat.uci.edu/* 3
#today.uci.edu/department/information_computer_sciences/* 4 #today.uci.edu/department/information_computer_sciences/* 4
#Semaphore for each domain to keep each domain noice and tidy with politeness
domain_semaphores = [Semaphore(3),Semaphore(3),Semaphore(3),Semaphore(3),Semaphore(3)]
data_mutex = Lock()
file_1_mutex = Lock()
file_2_mutex = Lock()
file_3_mutex = Lock()
file_4_mutex = Lock()
class Frontier(object): class Frontier(object):
def __init__(self, config, restart): def __init__(self, config, restart):
self.logger = get_logger("FRONTIER") self.logger = get_logger("FRONTIER")
self.config = config self.config = config
self.to_be_downloaded = list()
#Load balancer, list()
self.to_be_downloaded = [list(),list(),list(),list(),list()]
self.balance_index = 0
#Semaphore for each domain to keep each domain noice and tidy with politeness
self.domain_semaphores = [Lock(),Lock(),Lock(),Lock(),Lock()]
#Local data lock
self.data_mutex = Lock()
#FIle locks for data to make sure everything is thread-safe
self.file_1_mutex = Lock()
self.file_2_mutex = Lock()
self.file_3_mutex = Lock()
self.file_4_mutex = Lock()
# data collection is going to happen in the frontier # data collection is going to happen in the frontier
# uniques encompass overall unique links # uniques encompass overall unique links
self.uniques = set() self.uniques = set()
@ -63,13 +71,14 @@ class Frontier(object):
for url in self.config.seed_urls: for url in self.config.seed_urls:
self.add_url(url) self.add_url(url)
def _parse_save_file(self): def _parse_save_file(self):
''' This function can be overridden for alternate saving techniques. ''' ''' This function can be overridden for alternate saving techniques. '''
total_count = len(self.save) total_count = len(self.save)
tbd_count = 0 tbd_count = 0
for url, completed in self.save.values(): for url, completed in self.save.values():
if not completed and is_valid(url): if not completed and is_valid(url):
self.to_be_downloaded.append(url) self.to_be_downloaded[self.get_domain_index(url)].append(url)
tbd_count += 1 tbd_count += 1
self.logger.info( self.logger.info(
f"Found {tbd_count} urls to be downloaded from {total_count} " f"Found {tbd_count} urls to be downloaded from {total_count} "
@ -77,23 +86,24 @@ class Frontier(object):
def get_tbd_url(self): def get_tbd_url(self):
###CRITICAL SECTION ###CRITICAL SECTION
data_mutex.acquire() self.data_mutex.acquire()
try: try:
return self.to_be_downloaded.pop() hold = self.to_be_downloaded[balance_index].pop()
balance_index = balance_index + 1
self.data_mutex.release()
return hold
except IndexError: except IndexError:
self.data_mutex.release()
return None return None
data_mutex.release()
def add_url(self, url): def add_url(self, url):
url = normalize(url) url = normalize(url)
urlhash = get_urlhash(url) urlhash = get_urlhash(url)
##CRITICAL SECTION ##CRITICAL SECTION
data_mutex.acquire()
if urlhash not in self.save: if urlhash not in self.save:
self.save[urlhash] = (url, False) self.save[urlhash] = (url, False)
self.save.sync() self.save.sync()
self.to_be_downloaded.append(url) self.to_be_downloaded[self.get_domain_index(url)].append(url)
data_mutex.release()
###CRITICAL SECTION ###CRITICAL SECTION
@ -101,27 +111,18 @@ class Frontier(object):
urlhash = get_urlhash(url) urlhash = get_urlhash(url)
##CRITICAL SECTION ##CRITICAL SECTION
data_mutex.acquire() self.data_mutex.acquire()
if urlhash not in self.save: if urlhash not in self.save:
# This should not happen. # This should not happen.
self.logger.error( self.logger.error(
f"Completed url {url}, but have not seen it before.") f"Completed url {url}, but have not seen it before.")
self.save[urlhash] = (url, True) self.save[urlhash] = (url, True)
self.save.sync() self.save.sync()
data_mutex.release() self.data_mutex.release()
##CRITICAL SECTION ##CRITICAL SECTION
def get_domain_index(self,url):
def acquire_polite(url):
return domain_semaphores[get_semaphore_index(url)].acquire()
def release_polite(domain):
return domain_semaphores[get_semaphore_index(url)].release()
def get_semaphore_index(url):
if "ics.uci.edu" in url: if "ics.uci.edu" in url:
return 0 return 0
elif "cs.uci.edu" in url: elif "cs.uci.edu" in url:
@ -135,6 +136,21 @@ class Frontier(object):
else: else:
println("ERROR") println("ERROR")
def acquire_polite(self,url):
return self.domain_semaphores[self.get_domain_index(url)].acquire()
def release_polite(self,url):
return self.domain_semaphores[self.get_domain_index(url)].release()
def acquire_data_mutex(self):
return self.data_mutex.acquire()
def release_data_mutex(self):
return self.data_mutex.release()
def q1(self, url): def q1(self, url):
# rakslice (8 May 2013) Stackoverflow. https://stackoverflow.com/questions/16430258/creating-a-python-file-in-a-local-directory # rakslice (8 May 2013) Stackoverflow. https://stackoverflow.com/questions/16430258/creating-a-python-file-in-a-local-directory
# this saves to the local directory, so I can constantly access the right file and check if it exists or not # this saves to the local directory, so I can constantly access the right file and check if it exists or not
@ -143,32 +159,39 @@ class Frontier(object):
# Will create a file of all the unique links and you can read the file and do lines = f.readlines() then len(lines) to get the number of unique links # Will create a file of all the unique links and you can read the file and do lines = f.readlines() then len(lines) to get the number of unique links
#Locking and releasing each file #Locking and releasing each file
file_1_mutex.acquire() self.file_1_mutex.acquire()
if (os.path.exists(my_filename)): if (os.path.exists(my_filename)):
f = open(my_filename, 'a') f = open(my_filename, 'a')
f.write(removeFragment(url)) f.write(str(removeFragment(url)))
f.close() f.close()
else: else:
f = open(my_filename, 'w') f = open(my_filename, 'w')
f.write(removeFragment(url)) f.write(str(removeFragment(url)))
f.close() f.close()
file_1_mutex.release() self.file_1_mutex.release()
def q234(self, url, resp): def q234(self, url, resp):
# rakslice (8 May 2013) Stackoverflow. https://stackoverflow.com/questions/16430258/creating-a-python-file-in-a-local-directory # rakslice (8 May 2013) Stackoverflow. https://stackoverflow.com/questions/16430258/creating-a-python-file-in-a-local-directory
# this saves to the local directory, so I can constantly access the right file and check if it exists or not # this saves to the local directory, so I can constantly access the right file and check if it exists or not
file_2_mutex.acquire()
if resp.status != 200:
return
self.file_2_mutex.acquire()
path_to_script = os.path.dirname(os.path.abspath(__file__)) path_to_script = os.path.dirname(os.path.abspath(__file__))
my_filename = os.path.join(path_to_script, "q2.txt") my_filename = os.path.join(path_to_script, "q2.txt")
tempTok = tokenize(resp) try:
if len(tempTok) > self.max: tempTok = tokenize(resp)
self.max = len(tempTok) if len(tempTok) > self.max:
self.longest = url self.max = len(tempTok)
f = open(my_filename, 'w') self.longest = url
f.write("Longest Page: {url}, length: {length}".format(url = self.longest, length = self.max)) f = open(my_filename, 'w')
f.close() f.write("Longest Page: {url}, length: {length}".format(url = self.longest, length = self.max))
file_2_mutex.release() f.close()
except:
print("resp dying for some reason ?")
self.file_2_mutex.release()
tempTok = removeStopWords(tempTok) tempTok = removeStopWords(tempTok)
computeFrequencies(tempTok, self.grand_dict) computeFrequencies(tempTok, self.grand_dict)
@ -178,7 +201,7 @@ class Frontier(object):
path_to_script = os.path.dirname(os.path.abspath(__file__)) path_to_script = os.path.dirname(os.path.abspath(__file__))
my_filename = os.path.join(path_to_script, "q3.txt") my_filename = os.path.join(path_to_script, "q3.txt")
file_3_mutex.acquire() self.file_3_mutex.acquire()
f = open(my_filename, "w") f = open(my_filename, "w")
sortedGrandDict = {k: v for k, v in sorted(self.grand_dict.items(), key=lambda item: item[1], reverse = True)} sortedGrandDict = {k: v for k, v in sorted(self.grand_dict.items(), key=lambda item: item[1], reverse = True)}
@ -191,7 +214,7 @@ class Frontier(object):
i += 1 i += 1
f.close() f.close()
file_3_mutex.release() self.file_3_mutex.release()
fragless = removeFragment(url) fragless = removeFragment(url)
domain = findDomains(fragless.netloc) domain = findDomains(fragless.netloc)
@ -208,11 +231,11 @@ class Frontier(object):
my_filename = os.path.join(path_to_script, "q4.txt") my_filename = os.path.join(path_to_script, "q4.txt")
# creating text file for question 4 # creating text file for question 4
file_4_mutex.acquire() self.file_4_mutex.acquire()
sortedDictKeys = sorted(self.ics.keys()) sortedDictKeys = sorted(self.ics.keys())
f = open(my_filename, "w") f = open(my_filename, "w")
for i in sortedDictKeys: for i in sortedDictKeys:
f.write("{url}, {num}".format(url = self.ics[i].getNiceLink(), num = len(self.ics[i].getUniques()))) f.write("{url}, {num}".format(url = self.ics[i].getNiceLink(), num = len(self.ics[i].getUniques())))
f.close() f.close()
file_4_mutex.release() self.file_4_mutex.release()

View File

@ -51,14 +51,20 @@ class Worker(Thread):
toc = time.perf_counter() toc = time.perf_counter()
print(f"Took {toc - tic:0.4f} seconds to do scrape url") print(f"Took {toc - tic:0.4f} seconds to do scrape url")
tic = time.perf_counter() tic = time.perf_counter()
self.frontier.acquire_data_mutex()
for scraped_url in scraped_urls: for scraped_url in scraped_urls:
self.frontier.add_url(scraped_url) self.frontier.add_url(scraped_url)
self.frontier.release_data_mutex()
toc = time.perf_counter()
print(f"Took {toc - tic:0.4f} seconds to do add_url stuffs")
tic = time.perf_counter()
self.frontier.mark_url_complete(tbd_url) self.frontier.mark_url_complete(tbd_url)
toc = time.perf_counter() toc = time.perf_counter()
print(f"Took {toc - tic:0.4f} seconds to do store stuffs") print(f"Took {toc - tic:0.4f} seconds to do store stuffs")
while start + self.config.time_delay > time.perf_counter(): while start + self.config.time_delay > time.perf_counter():
time.sleep(self.config.time_delay/5) time.sleep(self.config.time_delay/5)
self.frontier.release_polite(tbd_url) self.frontier.release_polite(tbd_url)

View File

@ -18,11 +18,8 @@ def scraper(url, resp):
for link in links: for link in links:
tic = time.perf_counter()
if is_valid(link): if is_valid(link):
links_valid.add(link) links_valid.add(link)
toc = time.perf_counter()
print(f"Took {toc - tic:0.4f} seconds to do validate url")
#valid_links.write(link + "\n") #valid_links.write(link + "\n")
else: else:
# invalid_links.write("From: " + url + "\n") # invalid_links.write("From: " + url + "\n")