Add files via upload

First Upload
This commit is contained in:
iNocturnis
2022-04-15 17:55:11 -07:00
committed by GitHub
commit e19f68a6a6
16 changed files with 689 additions and 0 deletions

View File

@@ -0,0 +1,26 @@
from utils import get_logger
from crawler.frontier import Frontier
from crawler.worker import Worker
class Crawler(object):
def __init__(self, config, restart, frontier_factory=Frontier, worker_factory=Worker):
self.config = config
self.logger = get_logger("CRAWLER")
self.frontier = frontier_factory(config, restart)
self.workers = list()
self.worker_factory = worker_factory
def start_async(self):
self.workers = [
self.worker_factory(worker_id, self.config, self.frontier)
for worker_id in range(self.config.threads_count)]
for worker in self.workers:
worker.start()
def start(self):
self.start_async()
self.join()
def join(self):
for worker in self.workers:
worker.join()