27 lines
824 B
Python
27 lines
824 B
Python
from utils import get_logger
|
|
from crawler.frontier import Frontier
|
|
from crawler.worker import Worker
|
|
|
|
class Crawler(object):
|
|
def __init__(self, config, restart, frontier_factory=Frontier, worker_factory=Worker):
|
|
self.config = config
|
|
self.logger = get_logger("CRAWLER")
|
|
self.frontier = frontier_factory(config, restart)
|
|
self.workers = list()
|
|
self.worker_factory = worker_factory
|
|
|
|
def start_async(self):
|
|
self.workers = [
|
|
self.worker_factory(worker_id, self.config, self.frontier)
|
|
for worker_id in range(self.config.threads_count)]
|
|
for worker in self.workers:
|
|
worker.start()
|
|
|
|
def start(self):
|
|
self.start_async()
|
|
self.join()
|
|
|
|
def join(self):
|
|
for worker in self.workers:
|
|
worker.join()
|