huangxz的笔记 https://bbs.21ic.com/?73035 [收藏] [复制] [RSS]

日志

python 批量建立播放列表

已有 211 次阅读2018-4-27 16:20 |系统分类:兴趣爱好

#!/usr/bin/env python
# -*- coding:utf-8 -*-
#way 1: urlopen
import urllib,ipdb
from HTMLParser import HTMLParser
from BeautifulSoup import BeautifulSoup
import bs4,requests

url_abc = 'http://192.168.2.106/music/100首英文歌/'
url_abc = 'http://192.168.2.106/temp/l/'
def fetch_content1():
    f = urllib.urlopen(url_abc)
    #first_line = f.readline()
    print f.read()
    f.close()

    #print first_line

#way2:urlretrive
def fetch_content2():
    f1 = open('s2.m3u8', 'w')
    filename = urllib.urlretrieve(url_abc, "a1.html")
    print filename[0],"---", filename[1]
    html = open("a1.html", 'r')
    soup = bs4.BeautifulSoup(html)
    pageurls=soup.find_all("a",href=True)

    hh_idx = url_abc.index('//')
    print hh_idx
    hh = url_abc[:hh_idx]
    for links in pageurls:   
        field_name = links.text
        print field_name
        if(field_name[-4:] == ".mp3"):
            #ipdb.set_trace()
            f1.write("#EXTINF:-1,")
            f1.write(field_name.encode('utf-8'))
            f1.write('\n')
            s2 = hh+urllib.quote(url_abc[hh_idx:])+links.get("href")
            f1.write(s2)
            f1.write('\n')
            print s2
    f1.close()
    return pageurls

#way3: requests

def fetch_content3():
    print "start fetch(%s)content" %url_abc
    response = requests.get(url_abc)
    print "come off the content"

    soup = bs4.BeautifulSoup(response.content.decode('utf-8'))
    # 为了防止漏掉调用close方法,这里使用了with语句
    # 写入到文件中的编码为utf-8
    with open('archives.txt', 'w') as f :
        for archive in soup.select("a") :
            print archive
            f.write(archive.get_text().encode('utf-8') + "\n")
            #print archive.get_text().encode('utf-8')
    return soup

if __name__ == "__main__":
    s1 = fetch_content2()


路过

鸡蛋

鲜花

握手

雷人

评论 (0 个评论)