橙子建站电话_网页编辑器的作用是什么_龙岗区住房和建设局_石家庄第二波疫情最新消息
from lxml import etreeclass CrawlVideo:def __init__(self, stat_url=None, headers=None, base_url=None, first_path=None):if stat_url is None:self.star_url = 'https://www.acfun.cn/v/ac20783544'else:self.star_url = stat_urlif headers is None:self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}else:self.headers = headersif base_url is None:self.base_url = 'https://tx-safety-video.acfun.cn/mediacloud/acfun/acfun_video/'else:self.base_url = base_urlif first_path is None:self.first_path = 'C:/Users/admin/Desktop/'else:self.first_path = first_pathself.video_name = ""self.m3u8_name = ""def get_html_data(self, url=None, head=None):"""获取页面源码的方法:param url: 传入一个视频详情页的url:param head: 传入请求头:return: 返回一个html页面数据"""if head is None:head = self.headersif url is None:url = self.star_urlresponse = requests.get(url=url, headers=head)html_data_text = response.textself.m3u8_name = response.url.split('/')[-1]if not os.path.exists(f'{self.first_path}{self.m3u8_name}.txt'):with open(f'{self.first_path}{self.m3u8_name}.txt', 'w', encoding='utf-8') as f:f.write(html_data_text)with open(f'{self.first_path}{self.m3u8_name}.txt', 'r', encoding='utf-8') as f:html_data_text = f.read()return html_data_textdef get_fragment_url_list(self, data):"""获得所有完整的分段视频链接url:param data: 传入一个视频详情页源码数据:return: 返回一个列表,列表中含有所有分段视频的链接"""tree = etree.HTML(data)self.video_name = tree.xpath("//h1[@class='title']/span/text()")[0]print(self.video_name)m3u8_str = re.findall(r'"ksPlayJsonHevc":"(.*?)window.videoResource', data, re.S)[0].strip()[:-1]m3u8_uri_list = re.findall('"url":"(https://.*?m3u8.*?)",', m3u8_str.replace('\', ''))m3u8 = requests.get(url=m3u8_uri_list[0], headers=self.headers).textwith open(f'{self.first_path}{self.m3u8_name}_list.txt', 'w', encoding='utf-8') as f:f.write(m3u8)with open(f'{self.first_path}{self.m3u8_name}_list.txt', 'r', encoding='utf-8') as f:m3u8_str_list = f.readlines()fgt_url_list = [self.base_url + line.strip() for line in m3u8_str_list if not line.startswith("#")]print(fgt_url_list)return fgt_url_listdef append_fragment(self, fil, lst, v_typ=''):"""循环将文件写入一个文件的方法 (另一种分段链接视频的方法):param v_typ: 传入一个前半部分的补充链接:param fil: 传入一个视频存入位置的文件对象:param lst: 传入分段视频链接列表:return: None"""for u in lst:u = u.replace(self.base_url, (self.base_url + v_typ))print(u)res = requests.get(url=u, headers=self.headers).contentfil.write(res)def save_video(self, fra_url_list):"""将所有分段视频保存到一个文件中的方法:param fra_url_list: 传入一个分段视频列表:return: None"""if not os.path.exists(f'{self.first_path}{self.video_name}.mp4'):file = open(f'{self.first_path}{self.video_name}.mp4', 'ab')self.append_fragment(file, fra_url_list)if os.path.getsize(f'{self.first_path}{self.video_name}.mp4') == 0:self.append_fragment(file, fra_url_list, v_typ='hls/')file.flush()file.close()print("下载完成")if os.path.exists(f"{self.first_path}{self.m3u8_name}.txt"):os.remove(f"{self.first_path}{self.m3u8_name}.txt")if os.path.exists(f"{self.first_path}{self.m3u8_name}_list.txt"):os.remove(f"{self.first_path}{self.m3u8_name}_list.txt")if __name__ == '__main__':crawl = CrawlVideo()html_data = crawl.get_html_data()fragment_url_list = crawl.get_fragment_url_list(html_data)crawl.save_video(fragment_url_list)">