66 lines
2.1 KiB
Python
66 lines
2.1 KiB
Python
import asyncio
|
|
import aiohttp
|
|
from aiohttp_socks import ProxyConnector, ProxyType
|
|
import os
|
|
import urllib.parse
|
|
import time
|
|
|
|
|
|
async def download_file(session, url, filename, semaphore):
|
|
async with semaphore:
|
|
if os.path.exists(filename):
|
|
print(f"Skipping {filename}")
|
|
return True
|
|
|
|
retries = 3
|
|
while retries > 0:
|
|
try:
|
|
async with session.get(url, timeout=30) as response:
|
|
if response.status == 200:
|
|
content = await response.read()
|
|
with open(filename, "wb") as f:
|
|
f.write(content)
|
|
print(f"Successfully downloaded {filename}")
|
|
return True
|
|
else:
|
|
print(f"HTTP Error {response.status} for {url}")
|
|
except Exception as e:
|
|
print(f"Error for {url}: {e}")
|
|
|
|
retries -= 1
|
|
await asyncio.sleep(2)
|
|
|
|
print(f"Failed all retries for {url}")
|
|
return False
|
|
|
|
|
|
async def main():
|
|
if not os.path.exists("html_files"):
|
|
os.makedirs("html_files")
|
|
|
|
with open("missing_urls.txt", "r", encoding="utf-8") as f:
|
|
urls = [line.strip() for line in f if line.strip()]
|
|
|
|
# To satisfy the instruction: SOCKS5 proxy 127.0.0.1:10808
|
|
# aiohttp_socks uses socks5:// instead of socks5h:// but with rdns=True it is equivalent.
|
|
proxy_url = "socks5://127.0.0.1:10808"
|
|
connector = ProxyConnector.from_url(proxy_url, rdns=True)
|
|
|
|
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
|
|
|
|
semaphore = asyncio.Semaphore(10) # 10 concurrent downloads
|
|
|
|
async with aiohttp.ClientSession(connector=connector, headers=headers) as session:
|
|
tasks = []
|
|
for url in urls:
|
|
decoded_url = urllib.parse.unquote(url)
|
|
volume_name = decoded_url.split("/")[-1]
|
|
filename = f"html_files/{volume_name}.html"
|
|
tasks.append(download_file(session, url, filename, semaphore))
|
|
|
|
await asyncio.gather(*tasks)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|