added normalizing functionality + tfidf
This commit is contained in:
parent
d80a977450
commit
95ba16cf2e
@ -184,7 +184,7 @@ class Indexer():
|
|||||||
for token, postings in current_save.items():
|
for token, postings in current_save.items():
|
||||||
for p in postings:
|
for p in postings:
|
||||||
p.tfidf = p.tf * math.log(len(self.id)/len(postings))
|
p.tfidf = p.tf * math.log(len(self.id)/len(postings))
|
||||||
self.normalize[p.url] += p.tfidf
|
self.normalize[p.url] += p.tfidf**2
|
||||||
|
|
||||||
|
|
||||||
def get_data(self):
|
def get_data(self):
|
||||||
@ -261,8 +261,6 @@ class Indexer():
|
|||||||
#getting important tokens
|
#getting important tokens
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
indexer = Indexer(True,0)
|
indexer = Indexer(True,0)
|
||||||
indexer.get_data()
|
indexer.get_data()
|
||||||
|
Loading…
Reference in New Issue
Block a user