Add files via upload
First Upload
This commit is contained in:
36
spacetime-crawler4py-master/utils/download.py
Normal file
36
spacetime-crawler4py-master/utils/download.py
Normal file
@@ -0,0 +1,36 @@
|
||||
import requests
|
||||
import cbor
|
||||
import time
|
||||
|
||||
from utils.response import Response
|
||||
|
||||
def download(url, config, logger=None):
|
||||
host, port = config.cache_server
|
||||
#gotta check header first to make sure we are not downloading anything above 1MB, because 2MB is the avg size of a webpage with all images and scripts,
|
||||
#roughly 1MB is the upper limit of just the html marking and stuff alone, that is essentially double the avg of a webpage without the other stuff
|
||||
#
|
||||
resp = requests.head(
|
||||
f"http://{host}:{port}/",
|
||||
params=[("q", f"{url}"), ("u", f"{config.user_agent}")])
|
||||
|
||||
if resp.headers['content-length'] and resp.headers['content-length'].isdigit():
|
||||
if int(resp.headers['content-length']) > 1000000:
|
||||
print(int(resp.headers['content-length']))
|
||||
return Response({
|
||||
"error": f"FILE TOO LARGE !",
|
||||
"status": 606,
|
||||
"url" : url})
|
||||
|
||||
resp = requests.get(
|
||||
f"http://{host}:{port}/",
|
||||
params=[("q", f"{url}"), ("u", f"{config.user_agent}")])
|
||||
try:
|
||||
if resp and resp.content:
|
||||
return Response(cbor.loads(resp.content))
|
||||
except (EOFError, ValueError) as e:
|
||||
pass
|
||||
logger.error(f"Spacetime Response error {resp} with url {url}.")
|
||||
return Response({
|
||||
"error": f"Spacetime Response error {resp} with url {url}.",
|
||||
"status": resp.status_code,
|
||||
"url": url})
|
||||
Reference in New Issue
Block a user