python原始套接字socket下载http网页文件到txt
python原始套接字socket下载http网页文件到txt
import socket
def download_webpage(url, output_file):
try:
# 解析 URL
if url.startswith("http://"):
url = url[7:]
host = url.split("/")[0]
path = "/" + "/".join(url.split("/")[1:])
# 创建 socket 对象
client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# 获取主机的 IP 地址
ip = socket.gethostbyname(host)
# 连接到服务器
client_socket.connect((ip, 80))
# 构建 HTTP 请求
request = f"GET {path} HTTP/1.1\r\nHost: {host}\r\nConnection: close\r\n\r\n"
client_socket.sendall(request.encode())
# 接收响应
response = b""
while True:
data = client_socket.recv(4096)
if not data:
break
response += data
# 关闭 socket
client_socket.close()
# 分离 HTTP 头部和内容
header, content = response.split(b"\r\n\r\n", 1)
# 保存内容到文件
with open(output_file, "wb") as file:
file.write(content)
print(f"网页内容已成功保存到 {output_file}")
except Exception as e:
print(f"发生错误: {e}")
if __name__ == "__main__":
url = "http://www.5a8.com" # 替换为你要下载的网页 URL
output_file = "www5a8com.txt"
download_webpage(url, output_file)
运行结果
D:\code\python\get>python getsocket.py
网页内容已成功保存到 www5a8com.txt