self_example/TensorFlow_eaxmple/Model_train_test/spider/test.py

23 lines
635 B
Python

import requests
from lxml import etree
import scrapy
url="https://vipreader.qidian.com/chapter/1031940621/742792391/"
response=requests.get(url=url)
# print(response.text)
html=etree.HTML(response.text)
total=html.xpath('//*[@id="j_742792391"]/p')
# total=html.xpath('//*[@id="j_742792391"]/p[15]/text()')
total=html.xpath('//div[@class="read-content j_readContent"]/p')
print(total)
textList=[]
for i in range(10):
# print(p)
textList.append(html.xpath('//*[@id="j_742792391"]/p[{0}]/text()'.format(i)))
# textList.append(html.xpath('//*[@id="j_742792391"]/p[1]/text()'))
# print(text1)
# print(text2)
print(textList)