20231215爬虫更新代理池相关内容

This commit is contained in:
kevinding1125 2023-12-15 17:38:04 +08:00
parent b428606ea5
commit 0650cfb369
9 changed files with 187 additions and 0 deletions

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/15 14:30
@Usage :
@Desc :
'''

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/15 16:17
@Usage :
@Desc :
'''

View File

@ -0,0 +1,37 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/15 16:17
@Usage :
@Desc :
'''
import requests
from urllib.parse import urljoin
BASE_URL = 'https://login3.scrape.center/'
LOGIN_URL = urljoin(BASE_URL, '/api/login')
INDEX_URL = urljoin(BASE_URL, '/api/book')
USERNAME = 'admin'
PASSWORD = 'admin'
response_login = requests.post(LOGIN_URL, json={
'username': USERNAME,
'password': PASSWORD
})
data = response_login.json()
print('Response JSON', data)
jwt = data.get('token')
print('JWT', jwt)
headers = {
'Authorization': f'jwt {jwt}'
}
response_index = requests.get(INDEX_URL, params={
'limit': 18,
'offset': 0
}, headers=headers)
print('Response Status', response_index.status_code)
print('Response URL', response_index.url)
print('Response Data', response_index.json())

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/15 15:27
@Usage :
@Desc :
'''

View File

@ -0,0 +1,25 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/15 15:37
@Usage :
@Desc :
'''
import requests
from urllib.parse import urljoin
BASE_URL = 'https://login2.scrape.center/'
LOGIN_URL = urljoin(BASE_URL, '/login')
INDEX_URL = urljoin(BASE_URL, '/page/1')
USERNAME = 'admin'
PASSWORD = 'admin'
response_login = requests.post(LOGIN_URL, data={
'username': USERNAME,
'password': PASSWORD
})
response_index = requests.get(INDEX_URL)
print('Response Status', response_index.status_code)
print('Response URL', response_index.url)

View File

@ -0,0 +1,29 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/15 15:43
@Usage :
@Desc :
'''
import requests
from urllib.parse import urljoin
BASE_URL = 'https://login2.scrape.center/'
LOGIN_URL = urljoin(BASE_URL, '/login')
INDEX_URL = urljoin(BASE_URL, '/page/1')
USERNAME = 'admin'
PASSWORD = 'admin'
response_login = requests.post(LOGIN_URL, data={
'username': USERNAME,
'password': PASSWORD
}, allow_redirects=False)
cookies = response_login.cookies
print('Cookies', cookies)
response_index = requests.get(INDEX_URL, cookies=cookies)
print('Response Status', response_index.status_code)
print('Response URL', response_index.url)

View File

@ -0,0 +1,31 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/15 15:48
@Usage :
@Desc :
'''
import requests
from urllib.parse import urljoin
BASE_URL = 'https://login2.scrape.center/'
LOGIN_URL = urljoin(BASE_URL, '/login')
INDEX_URL = urljoin(BASE_URL, '/page/1')
USERNAME = 'admin'
PASSWORD = 'admin'
session = requests.Session()
response_login = session.post(LOGIN_URL, data={
'username': USERNAME,
'password': PASSWORD
})
cookies = session.cookies
print('Cookies', cookies)
response_index = session.get(INDEX_URL)
print('Response Status', response_index.status_code)
print('Response URL', response_index.url)

View File

@ -0,0 +1,41 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/15 15:53
@Usage :
@Desc :
'''
from urllib.parse import urljoin
from selenium import webdriver
from selenium.webdriver.common.by import By
import requests
import time
BASE_URL = 'https://login2.scrape.center/'
LOGIN_URL = urljoin(BASE_URL, '/login')
INDEX_URL = urljoin(BASE_URL, '/page/1')
USERNAME = 'admin'
PASSWORD = 'admin'
browser = webdriver.Chrome()
browser.get(BASE_URL)
browser.find_element(By.CSS_SELECTOR, 'input[name="username"]').send_keys(USERNAME)
browser.find_element(By.CSS_SELECTOR, 'input[name="password"]').send_keys(PASSWORD)
browser.find_element(By.CSS_SELECTOR, 'input[type="submit"]').click()
time.sleep(10)
# get cookies from selenium
cookies = browser.get_cookies()
print('Cookies', cookies)
browser.close()
# set cookies to requests
session = requests.Session()
for cookie in cookies:
session.cookies.set(cookie['name'], cookie['value'])
response_index = session.get(INDEX_URL)
print('Response Status', response_index.status_code)
print('Response URL', response_index.url)