Change more syntax to get data collection working, check extracturl and sorted links into sets instead of lists to signifcantly reduce url extractions

This commit is contained in:
Hieuhuy Pham
2022-04-20 04:03:58 -07:00
parent d0dde4a4db
commit 58d15918d5
4 changed files with 19 additions and 9 deletions

View File

@@ -111,17 +111,17 @@ class Frontier(object):
f = open("q1.txt", "w")
f.write("Number of unique pages: {length}\n".format(length = len(uniques)))
f.write("Number of unique pages: {length}\n".format(length = len(self.uniques)))
f.close()
# creating text file for question 2
f = open("q2.txt", "w")
f.write("Largest page url: {url} \nLength of page: {length}".format(url = longest, length = max))
f.write("Largest page url: {url} \nLength of page: {length}".format(url = self.longest, length = self.max))
f.close()
# creating text file for question 3
f = open("q3.txt", "w")
sortedGrandDict = {k: v for k, v in sorted(grand_dict.items(), key=lambda item: item[1], reverse = True)}
sortedGrandDict = {k: v for k, v in sorted(self.grand_dict.items(), key=lambda item: item[1], reverse = True)}
i = 0
for k, v in sortedGrandDict.items():
if i == 50:
@@ -132,10 +132,10 @@ class Frontier(object):
f.close()
# creating text file for question 4
sortedDictKeys = sorted(ics.keys())
sortedDictKeys = sorted(self.ics.keys())
f = open("q4.txt", "w")
for i in sortedDictKeys:
f.write("{url}, {num}".format(url = ics[i].getNiceLink(), num = len(ics[i].getUniques())))
f.write("{url}, {num}".format(url = self.ics[i].getNiceLink(), num = len(self.ics[i].getUniques())))
f.close()