From 95ba16cf2ef891617520cc88878f38c1b971006b Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 26 May 2022 01:05:26 -0700 Subject: [PATCH] added normalizing functionality + tfidf --- indexer.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/indexer.py b/indexer.py index e970c04..6a17b5f 100644 --- a/indexer.py +++ b/indexer.py @@ -184,7 +184,7 @@ class Indexer(): for token, postings in current_save.items(): for p in postings: p.tfidf = p.tf * math.log(len(self.id)/len(postings)) - self.normalize[p.url] += p.tfidf + self.normalize[p.url] += p.tfidf**2 def get_data(self): @@ -261,8 +261,6 @@ class Indexer(): #getting important tokens - - def main(): indexer = Indexer(True,0) indexer.get_data()