python原始套接字socket下载https网页文件到txt
python原始套接字socket下载https网页文件到txt
import socket
import ssl
def download_https_webpage(url, output_file):
try:
# 解析 URL
if url.startswith("https://"):
url = url[8:]
host = url.split("/")[0]
path = "/" + "/".join(url.split("/")[1:])
# 创建 socket 对象
client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# 获取主机的 IP 地址
ip = socket.gethostbyname(host)
# 使用 ssl 模块将 socket 包装为安全连接
context = ssl.create_default_context()
client_socket = context.wrap_socket(client_socket, server_hostname=host)
# 连接到服务器
client_socket.connect((ip, 443))
# 构建 HTTP 请求
request = f"GET {path} HTTP/1.1\r\nHost: {host}\r\nConnection: close\r\n\r\n"
client_socket.sendall(request.encode())
# 接收响应
response = b""
while True:
data = client_socket.recv(4096)
if not data:
break
response += data
# 关闭 socket
client_socket.close()
# 分离 HTTP 头部和内容
header, content = response.split(b"\r\n\r\n", 1)
# 保存内容到文件
with open(output_file, "wb") as file:
file.write(content)
print(f"网页内容已成功保存到 {output_file}")
except Exception as e:
print(f"发生错误: {e}")
if __name__ == "__main__":
url = "https://www.5a8.com" # 替换为你要下载的网页 URL
output_file = "www5a8com.txt"
download_https_webpage(url, output_file)
运行结果
D:\code\python\get>python getsocketssl.py
网页内容已成功保存到 www5a8com.txt