# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/8 16:54
@Usage :
@Desc :Pyquery学习 参考: https://github.com/Python3WebSpider/PyQueryTest
'''
from pyquery import PyQuery as pq
# 字符串初始化
def stringBase():
html = '''
'''
doc = pq(html)
print(doc('li'))
# URL初始化
def URLBase():
doc = pq(url='https://cuiqingcai.com')
print(doc('title'))
# 上述代码等同于下面
# doc = pq(requests.get('https://cuiqingcai.com').text)
# print(doc('title'))
# 文件初始化
def fileBase():
doc = pq(filename='demo.html')
print(doc('li'))
# 基本的css选择器
def cssSelect():
html = '''
'''
doc = pq(html)
print(doc('#container .list li'))
print(type(doc('#container .list li')))
#
for item in doc('#container .list li').items():
print(item.text())
# 寻找子节点
def child():
html = '''
'''
doc = pq(html)
items = doc('.list')
print(type(items))
print(items)
lis = items.find('li')
print(type(lis))
print(lis)
#
#
lis = items.children()
print(type(lis))
print(lis)
#
lis = items.children('.active')
print(lis)
def parent():
html = '''
'''
from pyquery import PyQuery as pq
doc = pq(html)
items = doc('.list')
container = items.parent()
print(type(container))
print(container)
from pyquery import PyQuery as pq
doc = pq(html)
items = doc('.list')
parents = items.parents()
print(type(parents))
print(parents)
parent = items.parents('.wrap')
print(parent)
from pyquery import PyQuery as pq
doc = pq(html)
li = doc('.list .item-0.active')
print(li.siblings())
def brother():
html = '''
'''
from pyquery import PyQuery as pq
doc = pq(html)
li = doc('.list .item-0.active')
print(li.siblings('.active'))
from pyquery import PyQuery as pq
doc = pq(html)
li = doc('.item-0.active')
print(li)
print(str(li))
from pyquery import PyQuery as pq
doc = pq(html)
# 可能是多个节点
lis = doc('li').items()
print(type(lis))
for li in lis:
print(li, type(li))
def attrs():
html = '''
'''
from pyquery import PyQuery as pq
doc = pq(html)
a = doc('.item-0.active a')
print(a, type(a))
print(a.attr('href'))
a = doc('a')
print(a, type(a))
print(a.attr('href'))
print(a.attr.href)
from pyquery import PyQuery as pq
doc = pq(html)
a = doc('a')
for item in a.items():
# 获取属性和文本
print(item.attr('href'),item.text())
def getHTML():
html = '''
'''
from pyquery import PyQuery as pq
doc = pq(html)
li = doc('li')
print(li.html()) # 第一个节点对应的html second item
print(li.text()) # 所有匹配的节点的文本 second item third item fourth item fifth item
print(type(li.text()))
# 增加或者删除节点的class
def operateNode():
html = '''
'''
from pyquery import PyQuery as pq
doc = pq(html)
li = doc('.item-0.active')
print(li)
li.removeClass('active')
print(li)
li.addClass('active')
print(li)
'''
third item
third item
third item
'''
def operateNodeInformation():
html = '''
'''
from pyquery import PyQuery as pq
doc = pq(html)
li = doc('.item-0.active')
print(li)
li.attr('name', 'link')
print(li)
li.text('changed item')
print(li)
li.html('changed item')
print(li)
'''
third item
changed item
changed item
'''
def removeInformation():
html = '''
Hello, World
This is a paragraph.
'''
from pyquery import PyQuery as pq
doc = pq(html)
wrap = doc('.wrap')
print(wrap.text())
'''
Hello, World
This is a paragraph.
'''
wrap.find('p').remove()
print(wrap.text())
'''
Hello, World
'''
# 伪类选择器
def fakeCSSSelect():
html = '''
'''
from pyquery import PyQuery as pq
doc = pq(html)
li = doc('li:first-child')
print(li)
li = doc('li:last-child')
print(li)
li = doc('li:nth-child(2)')
print(li)
li = doc('li:gt(2)')
print(li)
li = doc('li:nth-child(2n)')
print(li)
li = doc('li:contains(second)')
print(li)
if __name__ == '__main__':
fakeCSSSelect()