Add files via upload
First Upload
This commit is contained in:
35
spacetime-crawler4py-master/utils/__init__.py
Normal file
35
spacetime-crawler4py-master/utils/__init__.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import os
|
||||
import logging
|
||||
from hashlib import sha256
|
||||
from urllib.parse import urlparse
|
||||
|
||||
def get_logger(name, filename=None):
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(logging.INFO)
|
||||
if not os.path.exists("Logs"):
|
||||
os.makedirs("Logs")
|
||||
fh = logging.FileHandler(f"Logs/{filename if filename else name}.log")
|
||||
fh.setLevel(logging.DEBUG)
|
||||
ch = logging.StreamHandler()
|
||||
ch.setLevel(logging.INFO)
|
||||
formatter = logging.Formatter(
|
||||
"%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||
fh.setFormatter(formatter)
|
||||
ch.setFormatter(formatter)
|
||||
# add the handlers to the logger
|
||||
logger.addHandler(fh)
|
||||
logger.addHandler(ch)
|
||||
return logger
|
||||
|
||||
|
||||
def get_urlhash(url):
|
||||
parsed = urlparse(url)
|
||||
# everything other than scheme.
|
||||
return sha256(
|
||||
f"{parsed.netloc}/{parsed.path}/{parsed.params}/"
|
||||
f"{parsed.query}/{parsed.fragment}".encode("utf-8")).hexdigest()
|
||||
|
||||
def normalize(url):
|
||||
if url.endswith("/"):
|
||||
return url.rstrip("/")
|
||||
return url
|
||||
Reference in New Issue
Block a user