added normalizing functionality + tfidf

This commit is contained in:
unknown 2022-05-26 01:05:26 -07:00
parent d80a977450
commit 95ba16cf2e

View File

@ -184,7 +184,7 @@ class Indexer():
for token, postings in current_save.items(): for token, postings in current_save.items():
for p in postings: for p in postings:
p.tfidf = p.tf * math.log(len(self.id)/len(postings)) p.tfidf = p.tf * math.log(len(self.id)/len(postings))
self.normalize[p.url] += p.tfidf self.normalize[p.url] += p.tfidf**2
def get_data(self): def get_data(self):
@ -261,8 +261,6 @@ class Indexer():
#getting important tokens #getting important tokens
def main(): def main():
indexer = Indexer(True,0) indexer = Indexer(True,0)
indexer.get_data() indexer.get_data()