webcrawler/spacetime-crawler4py-master/crawler/__init__.py
iNocturnis e19f68a6a6
Add files via upload
First Upload
2022-04-15 17:55:11 -07:00

27 lines
824 B
Python

from utils import get_logger
from crawler.frontier import Frontier
from crawler.worker import Worker
class Crawler(object):
def __init__(self, config, restart, frontier_factory=Frontier, worker_factory=Worker):
self.config = config
self.logger = get_logger("CRAWLER")
self.frontier = frontier_factory(config, restart)
self.workers = list()
self.worker_factory = worker_factory
def start_async(self):
self.workers = [
self.worker_factory(worker_id, self.config, self.frontier)
for worker_id in range(self.config.threads_count)]
for worker in self.workers:
worker.start()
def start(self):
self.start_async()
self.join()
def join(self):
for worker in self.workers:
worker.join()