Search_Engine/search.py
2022-05-13 16:42:33 -07:00

64 lines
1.4 KiB
Python

#Data input
import json
import os
import shelve
from bs4 import BeautifulSoup
from time import perf_counter
import time
import threading
#Data process
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd
import numpy as np
import re
#Logging postings
from posting import Posting
from worker import Worker
class Search():
def __init__(self):
self.save_1 = shelve.open("save_1.shelve")
self.save_2 = shelve.open("save_2.shelve")
self.save_3 = shelve.open("save_3.shelve")
self.save_4 = shelve.open("save_4.shelve")
self.save_5 = shelve.open("save_5.shelve")
def get_save_file(self, word):
word_lower = word.lower()
if re.match(r"^[a-d0-1].*", word_lower):
return self.save_1
elif re.match(r"^[e-k2-3].*", word_lower):
return self.save_2
elif re.match(r"^[l-q4-7].*", word_lower):
return self.save_3
elif re.match(r"^[r-z8-9].*", word_lower):
return self.save_4
else:
return self.save_5
def get_userinput():
return
def get_tf_idf(self, words):
try:
tfidf = TfidfVectorizer(ngram_range=(1,3))
def search(query):
x = [query]
file = self.get_save_file()