import os
import shelve

from threading import Thread, Lock,Semaphore
from queue import Queue, Empty

import time

from utils import get_logger, get_urlhash, normalize
from scraper import is_valid
from datacollection import *


#*.ics.uci.edu/*                                                        0
#*.cs.uci.edu/*                                                         1
#*.informatics.uci.edu/*                                                2
#*.stat.uci.edu/*                                                       3
#today.uci.edu/department/information_computer_sciences/*               4



class Frontier(object):
    def __init__(self, config, restart):
        self.logger = get_logger("FRONTIER")
        self.config = config

        #Load balancer, list() 
        self.to_be_downloaded = [set(),set(),set(),set(),set()]

        self.balance_index = 0


        #Semaphore for each domain to keep each domain noice and tidy with politeness
        self.domain_semaphores = [Lock(),Lock(),Lock(),Lock(),Lock()]
        #Local data lock
        self.data_mutex = Lock()

        #FIle locks for data to make sure everything is thread-safe
        self.file_1_mutex = Lock()
        self.file_2_mutex = Lock()
        self.file_3_mutex = Lock()
        self.file_4_mutex = Lock()
        
       
        # data collection is going to happen in the frontier
        # uniques encompass overall unique links
        self.uniques = set() 
        # grand_dict encompasses all the words over the entire set of links
        self.grand_dict = dict()
        # ics dict contains all subdomains of ics
        self.ics = dict()
        # used to find the longest page
        self.max = -9999
        self.longest = None
        
        if not os.path.exists(self.config.save_file) and not restart:
            # Save file does not exist, but request to load save.
            self.logger.info(
                f"Did not find save file {self.config.save_file}, "
                f"starting from seed.")
        elif os.path.exists(self.config.save_file) and restart:
            # Save file does exists, but request to start from seed.
            self.logger.info(
                f"Found save file {self.config.save_file}, deleting it.")
            os.remove(self.config.save_file)
        # Load existing save file, or create one if it does not exist.
        self.save = shelve.open(self.config.save_file)
        if restart:
            for url in self.config.seed_urls:
                self.add_url(url)
        else:
            # Set the frontier state with contents of save file.
            self._parse_save_file()
            if not self.save:
                for url in self.config.seed_urls:
                    self.add_url(url)


    def _parse_save_file(self):
        ''' This function can be overridden for alternate saving techniques. '''
        total_count = len(self.save)
        tbd_count = 0
        for url, completed in self.save.values():
            if not completed and is_valid(url):
                self.to_be_downloaded[self.get_domain_index(url)].add(url)
                tbd_count += 1
        self.logger.info(
            f"Found {tbd_count} urls to be downloaded from {total_count} "
            f"total urls discovered.")

    def get_tbd_url(self):
        ###CRITICAL SECTION
        self.data_mutex.acquire()
        try:
            #Load balancing
            loop = 10
            while not self.to_be_downloaded[self.balance_index] and loop != 0:
                self.balance_index = self.balance_index + 1
                if self.balance_index > 4:
                    self.balance_index = 0
                loop = loop - 1
                if loop == 0:
                    self.data_mutex.release()
                    return None
            hold = self.to_be_downloaded[self.balance_index].pop()
            self.balance_index = self.balance_index + 1
            self.data_mutex.release()
            #print(hold)
            return hold

        except IndexError:
            print("POPPING RANDOM SHIT BRO")
            self.data_mutex.release()
            return None

    def add_url(self, url):
        url = normalize(url)
        urlhash = get_urlhash(url)
        ##CRITICAL SECTION
        if urlhash not in self.save:
            self.save[urlhash] = (url, False)
            self.save.sync()
            self.to_be_downloaded[self.get_domain_index(url)].add(url)
        ###CRITICAL SECTION
        

    def mark_url_complete(self, url):
        urlhash = get_urlhash(url)

        ##CRITICAL SECTION
        if urlhash not in self.save:
            # This should not happen.
            self.logger.error(
                f"Completed url {url}, but have not seen it before.")
        self.save[urlhash] = (url, True)
        self.save.sync()
        ##CRITICAL SECTION
    

    def get_domain_index(self,url):
        #yeah if you put ics.uci.edu in first it will add all informatics link into that instead
        if "informatics.uci.edu" in url:
            return 0
        elif "ics.uci.edu" in url: 
            return 1
        elif "cs.uci.edu" in url:
            return 2
        elif "stat.uci.edu" in url:
            return 3
        elif "today.uci.edu/department/information_computer_sciences/" in url:
            return 4
        else:
            print(url)
            print("ERROR")

        
      
    def acquire_polite(self,url):
        return self.domain_semaphores[self.get_domain_index(url)].acquire()

    def release_polite(self,url):
        return self.domain_semaphores[self.get_domain_index(url)].release()

    def acquire_data_mutex(self):
        return self.data_mutex.acquire()

    def release_data_mutex(self):
        return self.data_mutex.release()

    def acquire_234_mutex(self):
        return self.file_2_3_4_mutex.acquire()

    def release_234_mutex(self):
        return self.file_2_3_4_mutex.release()
    

    def q1(self, url):
        # rakslice (8 May 2013) Stackoverflow. https://stackoverflow.com/questions/16430258/creating-a-python-file-in-a-local-directory
        #       this saves to the local directory, so I can constantly access the right file and check if it exists or not
        path_to_script = os.path.dirname(os.path.abspath(__file__))
        my_filename = os.path.join(path_to_script, "q1.txt")
        
        # Will create a file of all the unique links and you can read the file and do lines = f.readlines() then len(lines) to get the number of unique links
        #Locking and releasing each file
        self.file_1_mutex.acquire()
        if (os.path.exists(my_filename)):
            f = open(my_filename, 'a')
            f.write(str(removeFragment(url)) + "\n")
            f.close()
        else:
            f = open(my_filename, 'w')
            f.write(str(removeFragment(url)) + "\n")
            f.close()
        self.file_1_mutex.release()

    def q234(self, url, resp):
        # rakslice (8 May 2013) Stackoverflow. https://stackoverflow.com/questions/16430258/creating-a-python-file-in-a-local-directory
        #       this saves to the local directory, so I can constantly access the right file and check if it exists or not
        
        if resp.status != 200:
            return

        tic = time.perf_counter()
        path_to_script = os.path.dirname(os.path.abspath(__file__))
        my_filename = os.path.join(path_to_script, "q2.txt")
        
        try:
            tempTok = tokenize(resp)
            self.file_2_mutex.acquire()
            if len(tempTok) > self.max:
                self.max = len(tempTok)
                self.longest = url
                f = open(my_filename, 'w')
                f.write("Longest Page: {url}, length: {length}".format(url = self.longest, length = self.max))
                f.close()
        except:
            print("resp dying for some reason ?")
        
        self.file_2_mutex.release()

        toc = time.perf_counter()
        #print(f"Took {toc - tic:0.4f} seconds to save file 2 !")

        tic = time.perf_counter()
        tempTok = removeStopWords(tempTok)
        self.file_3_mutex.acquire()
        computeFrequencies(tempTok, self.grand_dict)
        # rakslice (8 May 2013) Stackoverflow. https://stackoverflow.com/questions/16430258/creating-a-python-file-in-a-local-directory
        #       this saves to the local directory, so I can constantly access the right file and check if it exists or not
        path_to_script = os.path.dirname(os.path.abspath(__file__))
        my_filename = os.path.join(path_to_script, "q3.txt")


        f = open(my_filename, "w")
        sortedGrandDict = {k: v for k, v in sorted(self.grand_dict.items(), key=lambda item: item[1], reverse = True)}
        i = 0
        for k, v in sortedGrandDict.items():
            if i == 50:
                break
            else:
                f.write("{}: {}\n".format(k, v))
                i += 1
        f.close()
        self.file_3_mutex.release()

        toc = time.perf_counter()
        #print(f"Took {toc - tic:0.4f} seconds to save file 3 !")

        tic = time.perf_counter()

        fragless = removeFragment(url)
        domain = findDomains(fragless.netloc)
        self.file_4_mutex.acquire()
        if domain[1] == 'ics':
            if domain[0] not in self.ics:
                self.ics[domain[0]] = urlData(url, domain[0], domain[1])
            else:
                if fragless not in self.ics[domain[0]].getUniques():
                    self.ics[domain[0]].appendUnique(fragless)
        
        # rakslice (8 May 2013) Stackoverflow. https://stackoverflow.com/questions/16430258/creating-a-python-file-in-a-local-directory
        #       this saves to the local directory, so I can constantly access the right file and check if it exists or not
        path_to_script = os.path.dirname(os.path.abspath(__file__))
        my_filename = os.path.join(path_to_script, "q4.txt")

        # creating text file for question 4
        sortedDictKeys = sorted(self.ics.keys())
        f = open(my_filename, "w")
        for i in sortedDictKeys:
            f.write("{url}, {num} + \n".format(url = self.ics[i].getNiceLink(), num = len(self.ics[i].getUniques())))
        f.close()
        self.file_4_mutex.release()

        toc = time.perf_counter()
        #print(f"Took {toc - tic:0.4f} seconds to save file 4 !")