我试图写一段代码,调用几个URL,然后将整个刮来的文本保存在一个txt文件中,但我不知道在哪里实现一个循环函数而不破坏一切。
这就是代码现在的样子。
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from dhooks import Webhook, Embed
def getReadMe():
with open('urls.txt','r') as file:
return file.read()
def getHtml(readMe):
ua = UserAgent()
header = {'user-agent':ua.random}
response = requests.get(readMe,headers=header,timeout=3)
response.raise_for_status()
return response.content
readMe = getReadMe()
print(readMe)
html = getHtml(readMe)
soup = BeautifulSoup(html, 'html.parser')
text = soup.find_all(text=True)
output =''
blacklist = [
'[document]',
'noscript',
'header',
'html',
'meta',
'head',
'input',
'script',
'style'
for t in text:
if t.parent.name not in blacklist:
output += '{} '.format(t)
print(output)
with open("copy.txt", "w") as file:
file.write(str(output))