The Dormouse's story

# -*- encoding:utf-8 -*- ''' @Author : dingjiawen @Date : 2023/11/8 16:08 @Usage : @Desc :参考 https://github.com/Python3WebSpider/BeautifulSoupTest ''' html = """ The Dormouse's story

The Dormouse's story

Once upon a time there were three little sisters; and their names were , Lacie and Tillie; and they lived at the bottom of a well.

...

""" from bs4 import BeautifulSoup def baseUse(): soup = BeautifulSoup(html, 'lxml') print(soup.title) # The Dormouse's story print(type(soup.title)) # print(soup.title.string) # The Dormouse's story print(soup.head) # The Dormouse's story print(soup.p) #

The Dormouse's story

print(soup.p.name) # 获取节点名称 p print(soup.p.attrs) # 获取属性 {'class': ['title'], 'name': 'dromouse'} print(soup.p.attrs['name']) # 获取属性值 dromouse print(soup.p['name']) # 获取属性值 dromouse print(soup.body.p['name']) # 嵌套选择 dromouse print("==========================") def child(): html = """ The Dormouse's story

Once upon a time there were three little sisters; and their names were Elsie Lacie and Tillie and they lived at the bottom of a well.

...

""" soup = BeautifulSoup(html, 'lxml') # 子结点 for i, child in enumerate(soup.p.children): print(i, child) print("===============================") # 子孙节点 for i, child in enumerate(soup.p.descendants): print(i, child) print("===============================") def parent(): soup = BeautifulSoup(html, 'lxml') # 父节点 print(soup.a.parent) print("===============================") # 祖父节点 print(type(soup.a.parents)) print(list(enumerate(soup.a.parents))) print("=============================") def brother(): html = """

Once upon a time there were three little sisters; and their names were Elsie Hello Lacie and Tillie and they lived at the bottom of a well.

""" # 兄弟节点 soup = BeautifulSoup(html, 'lxml') print('Next Sibling', soup.a.next_sibling) print('Prev Sibling', soup.a.previous_sibling) print('Next Siblings', list(enumerate(soup.a.next_siblings))) print('Prev Siblings', list(enumerate(soup.a.previous_siblings))) # 找到所有满足条件的 def findAll(): html = '''

Hello

''' soup = BeautifulSoup(html, 'lxml') print(soup.find_all(name='ul')) print(type(soup.find_all(name='ul')[0])) for ul in soup.find_all(name='ul'): print(ul.find_all(name='li')) for ul in soup.find_all(name='ul'): print(ul.find_all(name='li')) for li in ul.find_all(name='li'): print(li.string) # 找属性满足匹配得到 def attrs(): html = '''

Hello

''' soup = BeautifulSoup(html, 'lxml') print(soup.find_all(attrs={'id': 'list-1'})) print(soup.find_all(attrs={'name': 'elements'})) # 常用的属性可以不用attrs传递 soup = BeautifulSoup(html, 'lxml') print(soup.find_all(id='list-1')) print(soup.find_all(class_='element')) import re print(soup.find_all(string=re.compile('Foo')))# string等同于text,即里面的具体内容 # 返回匹配到的第一个元素 def find(): html = '''

Hello

''' soup = BeautifulSoup(html, 'lxml') print(soup.find(name='ul')) print(type(soup.find(name='ul'))) print(soup.find(class_='list')) # css选择器 def cssSelect(): html = '''

Hello

''' soup = BeautifulSoup(html, 'lxml') print(soup.select('.panel .panel-heading')) print(soup.select('ul li')) print(soup.select('#list-2 .element')) print(type(soup.select('ul')[0])) # 嵌套选择 soup = BeautifulSoup(html, 'lxml') for ul in soup.select('ul'): print(ul.select('li')) # 获取属性 soup = BeautifulSoup(html, 'lxml') for ul in soup.select('ul'): print(ul['id']) print(ul.attrs['id']) # 获取文本 soup = BeautifulSoup(html, 'lxml') for li in soup.select('li'): print('Get Text:', li.get_text()) print('String:', li.string) if __name__ == '__main__': cssSelect()