This commit is contained in:
commit
8d530054ae
|
|
@ -0,0 +1,20 @@
|
|||
package com.atguigu.spark.core.rdd.operator.transform
|
||||
|
||||
import org.apache.spark.api.java.function.FilterFunction
|
||||
|
||||
/**
|
||||
* @ClassName aaa.java
|
||||
* @author dingjiawen@xiaomi.com
|
||||
* @version 1.0.0
|
||||
* @Description TODO
|
||||
* @createTime 2023-07-19 18:44:00
|
||||
*/
|
||||
class AAA(bbb: Int) extends FilterFunction[Int] {
|
||||
|
||||
val cc = bbb
|
||||
|
||||
override def call(t: Int): Boolean = {
|
||||
println(cc)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
package com.atguigu.spark.core.rdd.operator.transform
|
||||
|
||||
/**
|
||||
* @ClassName aaa.java
|
||||
* @author dingjiawen@xiaomi.com
|
||||
* @version 1.0.0
|
||||
* @Description TODO
|
||||
* @createTime 2023-07-19 18:44:00
|
||||
*/
|
||||
class CCC(bbb: Int) extends (Int => Boolean) with Serializable {
|
||||
|
||||
val cc = bbb
|
||||
|
||||
override def apply(t: Int): Boolean = {
|
||||
println(cc)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
package com.atguigu.spark.core.rdd.operator.transform
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper
|
||||
import net.jpountz.xxhash.{XXHash64, XXHashFactory}
|
||||
import org.apache.log4j.Logger
|
||||
import org.apache.spark.api.java.function.FilterFunction
|
||||
|
||||
import java.nio.ByteBuffer
|
||||
|
||||
/**
|
||||
* created by likunyi@xiaomi.com
|
||||
* at 2022-04-29 15:32:00
|
||||
* 该算子会对收到的数据进行抽样, 留下一部分数据, 丢掉另一部分数据
|
||||
*
|
||||
* samplingRate是采样率, 只能是整数
|
||||
* 当samplingRate = 10时, 代表采样率是10%
|
||||
* 当samplingRate = 64时, 代表采样率是64%
|
||||
*/
|
||||
class HdfsToHolo_MessageSampler1(samplingRate: Int) extends FilterFunction[String] {
|
||||
|
||||
@transient private var objectMapper: ObjectMapper = new ObjectMapper()
|
||||
@transient private val greatestCommonDivisor = 100
|
||||
@transient private val NUMERATOR: Int = samplingRate / greatestCommonDivisor
|
||||
@transient private val DENOMINATOR: Int = 100 / greatestCommonDivisor
|
||||
@transient private val XXHASH_SEED: Long = 0x9747b28c
|
||||
@transient private val hasher: XXHash64 = XXHashFactory.fastestInstance().hash64()
|
||||
@transient private val logger: Logger = Logger.getLogger(this.getClass.getName)
|
||||
|
||||
//初始化各项参数
|
||||
// def open(): Tuple5[ObjectMapper, Int, Int, Long, XXHash64] = {
|
||||
//
|
||||
// objectMapper = new ObjectMapper()
|
||||
//
|
||||
// val numerator = samplingRate // 分子就是采样率
|
||||
// val denominator = 100 // 分母永远是100
|
||||
// val greatestCommonDivisor = MathUtils.getGCD(numerator, denominator)
|
||||
//
|
||||
// NUMERATOR = numerator / greatestCommonDivisor
|
||||
// DENOMINATOR = denominator / greatestCommonDivisor
|
||||
// XXHASH_SEED = 0x9747b28c
|
||||
// hasher = XXHashFactory.fastestInstance().hash64()
|
||||
// (objectMapper,NUMERATOR,DENOMINATOR,XXHASH_SEED,hasher)
|
||||
// } // open
|
||||
|
||||
override def call(input: String): Boolean = {
|
||||
logger.info(s"所有参数:【objectMapper:${objectMapper}】,【NUMERATOR:${NUMERATOR}】,【DENOMINATOR:${DENOMINATOR}】,【hasher:${hasher}】")
|
||||
logger.info(s"${input}")
|
||||
|
||||
|
||||
// 如果不抽样(即抽样率是100%), 则直接将数据传递给下游, 不做任何处理
|
||||
if (samplingRate == 100) {
|
||||
return true
|
||||
}
|
||||
|
||||
val currentMessage = objectMapper.readTree(input)
|
||||
|
||||
// 如需抽样(即抽样率不是100%), 则使用distinct_id的哈希值去做抽样
|
||||
if (hash(currentMessage.get("distinct_id").asText()) % DENOMINATOR < NUMERATOR) { // 粗略地讲: 若采样率是64%, 则就将distinct_id的哈希值对100取余, 余数为0到63的都留下, 余数为64到99的都扔掉
|
||||
return true
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
private def hash(distinct_id: String): Long = {
|
||||
Math.abs(hasher.hash(ByteBuffer.wrap(distinct_id.getBytes("UTF-8")), XXHASH_SEED))
|
||||
} // hash
|
||||
|
||||
}
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
package com.atguigu.spark.core.rdd.operator.transform
|
||||
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.{SparkConf, SparkContext}
|
||||
|
||||
object Spark01_RDD_Operator_Transform {
|
||||
|
|
@ -11,7 +12,7 @@ object Spark01_RDD_Operator_Transform {
|
|||
val sc =new SparkContext(sparkConf)
|
||||
|
||||
//TODO 算子 - map
|
||||
val rdd = sc.makeRDD(
|
||||
val rdd: RDD[Int] = sc.makeRDD(
|
||||
List(1,2,3,4)
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,5 @@
|
|||
package com.atguigu.spark.core.rdd.operator.transform
|
||||
|
||||
import java.text.SimpleDateFormat
|
||||
import java.util.Date
|
||||
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.{SparkConf, SparkContext}
|
||||
|
||||
/**
|
||||
|
|
@ -11,21 +7,25 @@ import org.apache.spark.{SparkConf, SparkContext}
|
|||
*/
|
||||
object Spark08_RDD_Operator_Transform {
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
def main(args: Array[String]): Unit = {
|
||||
|
||||
val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator")
|
||||
//创建上下文环境对象
|
||||
val sc =new SparkContext(sparkConf)
|
||||
val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator")
|
||||
//创建上下文环境对象
|
||||
val sc = new SparkContext(sparkConf)
|
||||
|
||||
//TODO 算子 - filter
|
||||
val rdd = sc.makeRDD(
|
||||
List(1,2,3,4)
|
||||
)
|
||||
//TODO 算子 - filter
|
||||
val rdd = sc.makeRDD(
|
||||
List(1, 2, 3, 4)
|
||||
)
|
||||
|
||||
val filterRDD = rdd.filter(_ % 2 == 0)
|
||||
val filterRDD = rdd.filter(
|
||||
new CCC(10)
|
||||
)
|
||||
|
||||
filterRDD.collect().foreach(println(_))
|
||||
// val filterRDD = rdd.filter(_ % 2 == 0)
|
||||
|
||||
sc.stop()
|
||||
}
|
||||
filterRDD.collect().foreach(println(_))
|
||||
|
||||
sc.stop()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ package com.atguigu.spark.streaming
|
|||
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.streaming.dstream.ReceiverInputDStream
|
||||
import org.apache.spark.streaming.dstream.{InputDStream, ReceiverInputDStream}
|
||||
import org.apache.spark.streaming.{Seconds, StreamingContext}
|
||||
|
||||
import scala.collection.mutable
|
||||
|
|
@ -27,7 +27,7 @@ object SparkStreaming02_Queue {
|
|||
val rddQueue = new mutable.Queue[RDD[Int]]()
|
||||
|
||||
//4.创建QueueInputDStream
|
||||
val inputStream = ssc.queueStream(rddQueue,oneAtATime = false)
|
||||
val inputStream: InputDStream[Int] = ssc.queueStream(rddQueue,oneAtATime = false)
|
||||
|
||||
|
||||
//5.处理队列中的RDD数据
|
||||
|
|
|
|||
|
|
@ -29,10 +29,10 @@ object SparkStreaming06_state_transform {
|
|||
//和直接map的区别:写code的位置
|
||||
//Driver端
|
||||
val newDS: DStream[String] = lines.transform(
|
||||
rdd =>{
|
||||
rdd => {
|
||||
//code:Driver端,(周期性执行)
|
||||
rdd.map(
|
||||
str =>{
|
||||
str => {
|
||||
//code:Executor端
|
||||
str
|
||||
}
|
||||
|
|
@ -42,7 +42,7 @@ object SparkStreaming06_state_transform {
|
|||
|
||||
//code:Driver端
|
||||
val newDS1: DStream[String] = lines.map(
|
||||
str =>{
|
||||
str => {
|
||||
//code:executor端,无周期性执行
|
||||
str
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,102 @@
|
|||
package com.atguigu.gmall.realtime.app.dwm
|
||||
|
||||
import scala.collection.mutable
|
||||
|
||||
/**
|
||||
* @ClassName GenerateRPN.java
|
||||
* @author dingjiawen@xiaomi.com
|
||||
* @version 1.0.0
|
||||
* @Description TODO
|
||||
* @createTime 2023-06-30 17:27:00
|
||||
*/
|
||||
object GenerateRPN {
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
//测试用例
|
||||
//String str = "1+2*3-4*5-6+7*8-9"; //123*+45*-6-78*+9-
|
||||
// var str = "1 + 2 * 3 - 4 * 5 - (6 + 7 * 8 - 9)"; //123*+45*-6-78*+9-
|
||||
// var str = "5 + 2 * 3"; //123*+45*-6-78*+9-
|
||||
var str = "6 * ( 5 + ( 2 + 3 ) * 8 + 3 )"; //6523+8*+3+*
|
||||
var RPNStack: Array[String] = generateRPN(str)
|
||||
println(RPNStack.mkString(","))
|
||||
println(evalRPN(RPNStack))
|
||||
}
|
||||
|
||||
def generateRPN(expression: String): Array[String] = {
|
||||
val precedence = Map("+" -> 1, "-" -> 1, "*" -> 2, "/" -> 2)
|
||||
val output = mutable.Stack[String]()
|
||||
val stack = mutable.Stack[String]()
|
||||
|
||||
def isOperator(token: String): Boolean = {
|
||||
precedence.contains(token) }
|
||||
|
||||
def hasHigherPrecedence(op1: String, op2: String): Boolean = {
|
||||
precedence(op1) >= precedence(op2)
|
||||
}
|
||||
|
||||
def processOperator(op: String): Unit = {
|
||||
while (stack.nonEmpty && isOperator(stack.top) && hasHigherPrecedence(stack.top, op)) {
|
||||
output.push(stack.pop())
|
||||
}
|
||||
stack.push(op)
|
||||
}
|
||||
|
||||
def processOperand(operand: String): Unit = {
|
||||
output.push(operand)
|
||||
}
|
||||
|
||||
def processParenthesis(): Unit = {
|
||||
while (stack.nonEmpty && stack.top != "(") {
|
||||
output.push(stack.pop())
|
||||
}
|
||||
stack.pop() // 弹出左括号
|
||||
}
|
||||
|
||||
for (token <- expression.split("\\s+")) {
|
||||
token match {
|
||||
case "(" => stack.push(token)
|
||||
case ")" => processParenthesis()
|
||||
case t if isOperator(t) => processOperator(t)
|
||||
case _ => processOperand(token)
|
||||
}
|
||||
}
|
||||
|
||||
while (stack.nonEmpty) {
|
||||
output.push(stack.pop())
|
||||
}
|
||||
|
||||
output.toArray.reverse
|
||||
}
|
||||
|
||||
def evalRPN(tokens: Array[String]): Int = {
|
||||
val stack = mutable.Stack[Int]()
|
||||
|
||||
for (token <- tokens) {
|
||||
if (isOperator(token)) {
|
||||
val operand2 = stack.pop()
|
||||
val operand1 = stack.pop()
|
||||
val result = performOperation(token, operand1, operand2)
|
||||
stack.push(result)
|
||||
} else {
|
||||
stack.push(token.toInt)
|
||||
}
|
||||
}
|
||||
|
||||
stack.pop()
|
||||
}
|
||||
|
||||
def isOperator(token: String): Boolean = {
|
||||
token == "+" || token == "-" || token == "*" || token == "/"
|
||||
}
|
||||
|
||||
def performOperation(operator: String, operand1: Int, operand2: Int): Int = {
|
||||
operator match {
|
||||
case "+" => operand1 + operand2
|
||||
case "-" => operand1 - operand2
|
||||
case "*" => operand1 * operand2
|
||||
case "/" => operand1 / operand2
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
package com.atguigu.scala
|
||||
|
||||
class AAA(bbb:BBB) {
|
||||
|
||||
def open(): Unit = {
|
||||
bbb.open()
|
||||
println("aaaOpen")
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
package com.atguigu.scala
|
||||
|
||||
class BBB {
|
||||
|
||||
def this(a:Int,b:Int) {
|
||||
this()
|
||||
println(a)
|
||||
println(b)
|
||||
}
|
||||
|
||||
def open() ={
|
||||
println("open")
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
package com.atguigu.scala.chapter03
|
||||
|
||||
import com.atguigu.scala.test.User
|
||||
import com.atguigu.scala.test1.User
|
||||
|
||||
object Scala02_Oper {
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
package com.atguigu.scala.chapter06
|
||||
|
||||
import com.atguigu.scala.test.ScalaUser
|
||||
import com.atguigu.scala.test1.ScalaUser
|
||||
|
||||
object Scala09_Object_Instance_4 {
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
package com.atguigu.scala.chapter06
|
||||
|
||||
import com.atguigu.scala.test.ScalaUser
|
||||
import com.atguigu.scala.test1.ScalaUser
|
||||
|
||||
object Scala09_Object_Instance_5 {
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ object Scala05_Transform extends Parent with MyTrait {
|
|||
//TODO 3.特征或伴生对象
|
||||
//TODO 4.其他地方声明(包对象)
|
||||
//TODO 5.直接导入
|
||||
import com.atguigu.scala.test.TestTransform._
|
||||
import com.atguigu.scala.test1.TestTransform._
|
||||
val user=new User()
|
||||
user.insertUser()
|
||||
user.updateUser()
|
||||
|
|
|
|||
|
|
@ -0,0 +1,9 @@
|
|||
package com.atguigu.scala
|
||||
|
||||
object test111 {
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
new AAA(new BBB(1,2)).open()
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -185,6 +185,7 @@ public class DataServiceImplTest {
|
|||
assertEquals(tagName1, result.get(0).getTagName());
|
||||
assertEquals(tagName2, result.get(1).getTagName());
|
||||
|
||||
|
||||
// if (result.get(0).getTagName().equals(tagName1) && result.get(1).getTagName().equals(tagName2)) {
|
||||
// DataSample[] datasamples1 = result.get(0).getSamples();
|
||||
// DataSample[] datasamples2 = result.get(1).getSamples();
|
||||
|
|
|
|||
|
|
@ -1,264 +0,0 @@
|
|||
#
|
||||
# A fatal error has been detected by the Java Runtime Environment:
|
||||
#
|
||||
# EXCEPTION_ACCESS_VIOLATION (0xc0000005) at pc=0x000000006abea148, pid=12532, tid=0x0000000000004e18
|
||||
#
|
||||
# JRE version: Java(TM) SE Runtime Environment (8.0_311-b11) (build 1.8.0_311-b11)
|
||||
# Java VM: Java HotSpot(TM) 64-Bit Server VM (25.311-b11 mixed mode windows-amd64 compressed oops)
|
||||
# Problematic frame:
|
||||
# V [jvm.dll+0x19a148]
|
||||
#
|
||||
# Failed to write core dump. Minidumps are not enabled by default on client versions of Windows
|
||||
#
|
||||
# If you would like to submit a bug report, please visit:
|
||||
# http://bugreport.java.com/bugreport/crash.jsp
|
||||
#
|
||||
|
||||
--------------- T H R E A D ---------------
|
||||
|
||||
Current thread (0x000002d51ddc9000): JavaThread "JDWP Transport Listener: dt_socket" daemon [_thread_in_vm, id=19992, stack(0x000000ec02500000,0x000000ec02600000)]
|
||||
|
||||
siginfo: ExceptionCode=0xc0000005, reading address 0x000002d51d8e2018
|
||||
|
||||
Registers:
|
||||
RAX=0x000002d51d7786d0, RBX=0x0000000000000003, RCX=0x000002d51d8e2008, RDX=0x000002d51d779200
|
||||
RSP=0x000000ec025ff7b0, RBP=0x000000ec025ff829, RSI=0x00000000000000b6, RDI=0x000002d51d779268
|
||||
R8 =0x000002d51d779aa8, R9 =0x00007ffedf820000, R10=0x000002d51d779211, R11=0x000002d51fea87d9
|
||||
R12=0x000002d51fea87d8, R13=0x000000ec025ff8b0, R14=0x000000000000005b, R15=0x00000000000000b6
|
||||
RIP=0x000000006abea148, EFLAGS=0x0000000000010202
|
||||
|
||||
Top of Stack: (sp=0x000000ec025ff7b0)
|
||||
0x000000ec025ff7b0: 000002d51ddc9000 00000000000000b6
|
||||
0x000000ec025ff7c0: 0000000000000003 000002d51ddc9000
|
||||
0x000000ec025ff7d0: 000002d51d779268 000002d51d779268
|
||||
0x000000ec025ff7e0: 000002d51d779268 000002d51d779268
|
||||
0x000000ec025ff7f0: 000002d51ddc9000 000002d51d779268
|
||||
0x000000ec025ff800: 000002d51ddc9000 000002d51d779268
|
||||
0x000000ec025ff810: 000002d51ddc9000 0000005b00000058
|
||||
0x000000ec025ff820: 000000b600000072 000000006ac50000
|
||||
0x000000ec025ff830: 00000000000000b6 0000000000000000
|
||||
0x000000ec025ff840: 0000000000000000 0000000000000072
|
||||
0x000000ec025ff850: 000000ec025ff9c0 0000000000000000
|
||||
0x000000ec025ff860: 0000000000000000 000000ec025ff9c8
|
||||
0x000000ec025ff870: 000002d51ddc9000 000002d51d779268
|
||||
0x000000ec025ff880: 0000000000000000 000000006abef64f
|
||||
0x000000ec025ff890: 000000ec025ff8b0 000002d51fea87d8
|
||||
0x000000ec025ff8a0: 000002d507c50a01 000002d51d779268
|
||||
|
||||
Instructions: (pc=0x000000006abea148)
|
||||
0x000000006abea128: 10 84 d2 74 0b 41 8b 45 31 f7 d0 48 63 c8 eb 05
|
||||
0x000000006abea138: 41 0f b7 4d 31 4c 8b 6d 67 48 c1 e1 05 49 03 c8
|
||||
0x000000006abea148: 48 8b 49 10 44 8b 75 f3 0f b6 c1 66 c1 e0 08 66
|
||||
0x000000006abea158: c1 e9 08 66 0b c1 66 41 89 44 24 01 84 d2 0f 84
|
||||
|
||||
|
||||
Register to memory mapping:
|
||||
|
||||
RAX=0x000002d51d7786d0 is pointing into metadata
|
||||
RBX=0x0000000000000003 is an unknown value
|
||||
RCX=0x000002d51d8e2008 is an unknown value
|
||||
RDX=0x000002d51d779200 is pointing into metadata
|
||||
RSP=0x000000ec025ff7b0 is pointing into the stack for thread: 0x000002d51ddc9000
|
||||
RBP=0x000000ec025ff829 is pointing into the stack for thread: 0x000002d51ddc9000
|
||||
RSI=0x00000000000000b6 is an unknown value
|
||||
RDI={method} {0x000002d51d779270} 'test' '()V' in 'com/markilue/leecode/listnode/MyLinkedList'
|
||||
R8 =0x000002d51d779aa8 is pointing into metadata
|
||||
R9 =0x00007ffedf820000 is an unknown value
|
||||
R10=0x000002d51d779211 is pointing into metadata
|
||||
R11=0x000002d51fea87d9 is an unknown value
|
||||
R12=0x000002d51fea87d8 is an unknown value
|
||||
R13=0x000000ec025ff8b0 is pointing into the stack for thread: 0x000002d51ddc9000
|
||||
R14=0x000000000000005b is an unknown value
|
||||
R15=0x00000000000000b6 is an unknown value
|
||||
|
||||
|
||||
Stack: [0x000000ec02500000,0x000000ec02600000], sp=0x000000ec025ff7b0, free space=1021k
|
||||
Native frames: (J=compiled Java code, j=interpreted, Vv=VM code, C=native code)
|
||||
V [jvm.dll+0x19a148]
|
||||
V [jvm.dll+0x19f64f]
|
||||
V [jvm.dll+0x3408eb]
|
||||
C [jdwp.dll+0x4296]
|
||||
C [jdwp.dll+0xef91]
|
||||
C [jdwp.dll+0x1f4f5]
|
||||
C [jdwp.dll+0x1f45e]
|
||||
V [jvm.dll+0x1ba3aa]
|
||||
V [jvm.dll+0x23df22]
|
||||
V [jvm.dll+0x29253c]
|
||||
C [ucrtbase.dll+0x21bb2]
|
||||
C [KERNEL32.DLL+0x17034]
|
||||
C [ntdll.dll+0x52651]
|
||||
|
||||
|
||||
--------------- P R O C E S S ---------------
|
||||
|
||||
Java Threads: ( => current thread )
|
||||
0x000002d51fc40800 JavaThread "Service Thread" daemon [_thread_blocked, id=21716, stack(0x000000ec02c00000,0x000000ec02d00000)]
|
||||
0x000002d51fba9000 JavaThread "C1 CompilerThread3" daemon [_thread_blocked, id=14316, stack(0x000000ec02b00000,0x000000ec02c00000)]
|
||||
0x000002d51fb9e800 JavaThread "C2 CompilerThread2" daemon [_thread_blocked, id=19840, stack(0x000000ec02a00000,0x000000ec02b00000)]
|
||||
0x000002d51fb9d800 JavaThread "C2 CompilerThread1" daemon [_thread_blocked, id=16824, stack(0x000000ec02900000,0x000000ec02a00000)]
|
||||
0x000002d51fb9b000 JavaThread "C2 CompilerThread0" daemon [_thread_blocked, id=12300, stack(0x000000ec02800000,0x000000ec02900000)]
|
||||
0x000002d51faf3800 JavaThread "JDWP Command Reader" daemon [_thread_in_native, id=2456, stack(0x000000ec02700000,0x000000ec02800000)]
|
||||
0x000002d51faf0800 JavaThread "JDWP Event Helper Thread" daemon [_thread_blocked, id=9096, stack(0x000000ec02600000,0x000000ec02700000)]
|
||||
=>0x000002d51ddc9000 JavaThread "JDWP Transport Listener: dt_socket" daemon [_thread_in_vm, id=19992, stack(0x000000ec02500000,0x000000ec02600000)]
|
||||
0x000002d51ddbc000 JavaThread "Attach Listener" daemon [_thread_blocked, id=18836, stack(0x000000ec02400000,0x000000ec02500000)]
|
||||
0x000002d51dd67800 JavaThread "Signal Dispatcher" daemon [_thread_blocked, id=21684, stack(0x000000ec02300000,0x000000ec02400000)]
|
||||
0x000002d51dd39000 JavaThread "Finalizer" daemon [_thread_blocked, id=10800, stack(0x000000ec02200000,0x000000ec02300000)]
|
||||
0x000002d51dd30800 JavaThread "Reference Handler" daemon [_thread_blocked, id=21884, stack(0x000000ec02100000,0x000000ec02200000)]
|
||||
0x000002d507b9a800 JavaThread "main" [_thread_blocked, id=16780, stack(0x000000ec01700000,0x000000ec01800000)]
|
||||
|
||||
Other Threads:
|
||||
0x000002d51dd06800 VMThread [stack: 0x000000ec02000000,0x000000ec02100000] [id=3664]
|
||||
0x000002d51fc59000 WatcherThread [stack: 0x000000ec02d00000,0x000000ec02e00000] [id=15732]
|
||||
|
||||
VM state:not at safepoint (normal execution)
|
||||
|
||||
VM Mutex/Monitor currently owned by a thread: None
|
||||
|
||||
heap address: 0x0000000081c00000, size: 2020 MB, Compressed Oops mode: 32-bit
|
||||
Narrow klass base: 0x0000000000000000, Narrow klass shift: 3
|
||||
Compressed class space size: 1073741824 Address: 0x0000000100000000
|
||||
|
||||
Heap:
|
||||
PSYoungGen total 38400K, used 10018K [0x00000000d5f00000, 0x00000000d8980000, 0x0000000100000000)
|
||||
eden space 33280K, 30% used [0x00000000d5f00000,0x00000000d68c8bd8,0x00000000d7f80000)
|
||||
from space 5120K, 0% used [0x00000000d8480000,0x00000000d8480000,0x00000000d8980000)
|
||||
to space 5120K, 0% used [0x00000000d7f80000,0x00000000d7f80000,0x00000000d8480000)
|
||||
ParOldGen total 87552K, used 0K [0x0000000081c00000, 0x0000000087180000, 0x00000000d5f00000)
|
||||
object space 87552K, 0% used [0x0000000081c00000,0x0000000081c00000,0x0000000087180000)
|
||||
Metaspace used 5032K, capacity 5348K, committed 5504K, reserved 1056768K
|
||||
class space used 579K, capacity 595K, committed 640K, reserved 1048576K
|
||||
|
||||
Card table byte_map: [0x000002d518910000,0x000002d518d10000] byte_map_base: 0x000002d518502000
|
||||
|
||||
Marking Bits: (ParMarkBitMap*) 0x000000006b238030
|
||||
Begin Bits: [0x000002d518fc0000, 0x000002d51af50000)
|
||||
End Bits: [0x000002d51af50000, 0x000002d51cee0000)
|
||||
|
||||
Polling page: 0x000002d507cf0000
|
||||
|
||||
CodeCache: size=245760Kb used=1754Kb max_used=1771Kb free=244005Kb
|
||||
bounds [0x000002d509550000, 0x000002d5097c0000, 0x000002d518550000]
|
||||
total_blobs=523 nmethods=259 adapters=185
|
||||
compilation: enabled
|
||||
|
||||
Compilation events (10 events):
|
||||
Event: 0.989 Thread 0x000002d51fba9000 256 3 java.io.File::isInvalid (47 bytes)
|
||||
Event: 0.989 Thread 0x000002d51fba9000 nmethod 256 0x000002d5096ffdd0 code [0x000002d5096fff40, 0x000002d509700390]
|
||||
Event: 0.989 Thread 0x000002d51fb9e800 257 4 sun.misc.MetaIndex::mayContain (51 bytes)
|
||||
Event: 0.991 Thread 0x000002d51fb9b000 nmethod 243 0x000002d509704390 code [0x000002d5097045c0, 0x000002d509705850]
|
||||
Event: 0.994 Thread 0x000002d51fba9000 258 3 java.lang.Character::charCount (12 bytes)
|
||||
Event: 0.994 Thread 0x000002d51fba9000 nmethod 258 0x000002d509704010 code [0x000002d509704160, 0x000002d5097042f8]
|
||||
Event: 0.997 Thread 0x000002d51fb9e800 nmethod 257 0x000002d509706e10 code [0x000002d509706f60, 0x000002d509707498]
|
||||
Event: 1.000 Thread 0x000002d51fba9000 259 1 java.nio.Buffer::limit (5 bytes)
|
||||
Event: 1.000 Thread 0x000002d51fba9000 nmethod 259 0x000002d509703d50 code [0x000002d509703ea0, 0x000002d509703fb8]
|
||||
Event: 1.017 Thread 0x000002d51fb9d800 nmethod 253 0x000002d50970a7d0 code [0x000002d50970aa80, 0x000002d50970c1a8]
|
||||
|
||||
GC Heap History (0 events):
|
||||
No events
|
||||
|
||||
Deoptimization events (0 events):
|
||||
No events
|
||||
|
||||
Classes redefined (6 events):
|
||||
Event: 120.821 Thread 0x000002d51dd06800 redefined class name=com.markilue.leecode.listnode.MyLinkedList, count=1
|
||||
Event: 120.822 Thread 0x000002d51dd06800 redefined class name=com.markilue.leecode.listnode.ListNode, count=1
|
||||
Event: 164.527 Thread 0x000002d51dd06800 redefined class name=com.markilue.leecode.listnode.MyLinkedList, count=2
|
||||
Event: 164.528 Thread 0x000002d51dd06800 redefined class name=com.markilue.leecode.listnode.ListNode, count=2
|
||||
Event: 308.152 Thread 0x000002d51dd06800 redefined class name=com.markilue.leecode.listnode.MyLinkedList, count=3
|
||||
Event: 308.152 Thread 0x000002d51dd06800 redefined class name=com.markilue.leecode.listnode.ListNode, count=3
|
||||
|
||||
Internal exceptions (7 events):
|
||||
Event: 0.110 Thread 0x000002d507b9a800 Exception <a 'java/lang/NoSuchMethodError': Method sun.misc.Unsafe.defineClass(Ljava/lang/String;[BII)Ljava/lang/Class; name or signature does not match> (0x00000000d5f07cc0) thrown at [C:\jenkins\workspace\8-2-build-windows-amd64-cygwin\jdk8u311\1894\hot
|
||||
Event: 0.110 Thread 0x000002d507b9a800 Exception <a 'java/lang/NoSuchMethodError': Method sun.misc.Unsafe.prefetchRead(Ljava/lang/Object;J)V name or signature does not match> (0x00000000d5f07fa8) thrown at [C:\jenkins\workspace\8-2-build-windows-amd64-cygwin\jdk8u311\1894\hotspot\src\share\vm\
|
||||
Event: 0.817 Thread 0x000002d507b9a800 Exception <a 'java/io/FileNotFoundException'> (0x00000000d62ad468) thrown at [C:\jenkins\workspace\8-2-build-windows-amd64-cygwin\jdk8u311\1894\hotspot\src\share\vm\prims\jni.cpp, line 710]
|
||||
Event: 0.848 Thread 0x000002d507b9a800 Exception <a 'java/security/PrivilegedActionException'> (0x00000000d638d038) thrown at [C:\jenkins\workspace\8-2-build-windows-amd64-cygwin\jdk8u311\1894\hotspot\src\share\vm\prims\jvm.cpp, line 1523]
|
||||
Event: 0.848 Thread 0x000002d507b9a800 Exception <a 'java/security/PrivilegedActionException'> (0x00000000d638d430) thrown at [C:\jenkins\workspace\8-2-build-windows-amd64-cygwin\jdk8u311\1894\hotspot\src\share\vm\prims\jvm.cpp, line 1523]
|
||||
Event: 0.849 Thread 0x000002d507b9a800 Exception <a 'java/security/PrivilegedActionException'> (0x00000000d638fb28) thrown at [C:\jenkins\workspace\8-2-build-windows-amd64-cygwin\jdk8u311\1894\hotspot\src\share\vm\prims\jvm.cpp, line 1523]
|
||||
Event: 0.849 Thread 0x000002d507b9a800 Exception <a 'java/security/PrivilegedActionException'> (0x00000000d638ff20) thrown at [C:\jenkins\workspace\8-2-build-windows-amd64-cygwin\jdk8u311\1894\hotspot\src\share\vm\prims\jvm.cpp, line 1523]
|
||||
|
||||
Events (10 events):
|
||||
Event: 1527.523 Executing VM operation: GetOrSetLocal
|
||||
Event: 1527.523 Executing VM operation: GetOrSetLocal done
|
||||
Event: 1559.202 Executing VM operation: ChangeBreakpoints
|
||||
Event: 1559.202 Executing VM operation: ChangeBreakpoints done
|
||||
Event: 1559.805 Executing VM operation: ChangeBreakpoints
|
||||
Event: 1559.805 Executing VM operation: ChangeBreakpoints done
|
||||
Event: 1573.008 Executing VM operation: ChangeBreakpoints
|
||||
Event: 1573.008 Executing VM operation: ChangeBreakpoints done
|
||||
Event: 1581.175 Executing VM operation: ChangeBreakpoints
|
||||
Event: 1581.176 Executing VM operation: ChangeBreakpoints done
|
||||
|
||||
|
||||
Dynamic libraries:
|
||||
0x00007ff61b6b0000 - 0x00007ff61b6f7000 E:\Java\JDK8\bin\java.exe
|
||||
0x00007ffee8f90000 - 0x00007ffee9188000 C:\WINDOWS\SYSTEM32\ntdll.dll
|
||||
0x00007ffee7cf0000 - 0x00007ffee7dad000 C:\WINDOWS\System32\KERNEL32.DLL
|
||||
0x00007ffee6a20000 - 0x00007ffee6cee000 C:\WINDOWS\System32\KERNELBASE.dll
|
||||
0x00007ffee8050000 - 0x00007ffee80fe000 C:\WINDOWS\System32\ADVAPI32.dll
|
||||
0x00007ffee7560000 - 0x00007ffee75fe000 C:\WINDOWS\System32\msvcrt.dll
|
||||
0x00007ffee8dd0000 - 0x00007ffee8e6c000 C:\WINDOWS\System32\sechost.dll
|
||||
0x00007ffee8990000 - 0x00007ffee8ab5000 C:\WINDOWS\System32\RPCRT4.dll
|
||||
0x00007ffee7eb0000 - 0x00007ffee8050000 C:\WINDOWS\System32\USER32.dll
|
||||
0x00007ffee6880000 - 0x00007ffee68a2000 C:\WINDOWS\System32\win32u.dll
|
||||
0x00007ffee8170000 - 0x00007ffee819a000 C:\WINDOWS\System32\GDI32.dll
|
||||
0x00007ffee6e40000 - 0x00007ffee6f4b000 C:\WINDOWS\System32\gdi32full.dll
|
||||
0x00007ffee6f50000 - 0x00007ffee6fed000 C:\WINDOWS\System32\msvcp_win.dll
|
||||
0x00007ffee6cf0000 - 0x00007ffee6df0000 C:\WINDOWS\System32\ucrtbase.dll
|
||||
0x00007ffed59c0000 - 0x00007ffed5c5a000 C:\WINDOWS\WinSxS\amd64_microsoft.windows.common-controls_6595b64144ccf1df_6.0.19041.1110_none_60b5254171f9507e\COMCTL32.dll
|
||||
0x00007ffee88f0000 - 0x00007ffee8920000 C:\WINDOWS\System32\IMM32.DLL
|
||||
0x00007ffedf820000 - 0x00007ffedf835000 E:\Java\JDK8\jre\bin\vcruntime140.dll
|
||||
0x00007ffebd920000 - 0x00007ffebd9bb000 E:\Java\JDK8\jre\bin\msvcp140.dll
|
||||
0x000000006aa50000 - 0x000000006b2b0000 E:\Java\JDK8\jre\bin\server\jvm.dll
|
||||
0x00007ffee7ce0000 - 0x00007ffee7ce8000 C:\WINDOWS\System32\PSAPI.DLL
|
||||
0x00007ffed2a20000 - 0x00007ffed2a29000 C:\WINDOWS\SYSTEM32\WSOCK32.dll
|
||||
0x00007ffed5e10000 - 0x00007ffed5e37000 C:\WINDOWS\SYSTEM32\WINMM.dll
|
||||
0x00007ffedfb10000 - 0x00007ffedfb1a000 C:\WINDOWS\SYSTEM32\VERSION.dll
|
||||
0x00007ffee8100000 - 0x00007ffee816b000 C:\WINDOWS\System32\WS2_32.dll
|
||||
0x00007ffee4f90000 - 0x00007ffee4fa2000 C:\WINDOWS\SYSTEM32\kernel.appcore.dll
|
||||
0x00007ffee0460000 - 0x00007ffee0470000 E:\Java\JDK8\jre\bin\verify.dll
|
||||
0x00007ffedf7f0000 - 0x00007ffedf81b000 E:\Java\JDK8\jre\bin\java.dll
|
||||
0x00007ffedb7d0000 - 0x00007ffedb806000 E:\Java\JDK8\jre\bin\jdwp.dll
|
||||
0x00007ffee1c90000 - 0x00007ffee1c99000 E:\Java\JDK8\jre\bin\npt.dll
|
||||
0x00007ffedf8f0000 - 0x00007ffedf920000 E:\Java\JDK8\jre\bin\instrument.dll
|
||||
0x00007ffedef10000 - 0x00007ffedef28000 E:\Java\JDK8\jre\bin\zip.dll
|
||||
0x00007ffee81a0000 - 0x00007ffee88e4000 C:\WINDOWS\System32\SHELL32.dll
|
||||
0x00007ffee4790000 - 0x00007ffee4f24000 C:\WINDOWS\SYSTEM32\windows.storage.dll
|
||||
0x00007ffee6ff0000 - 0x00007ffee7344000 C:\WINDOWS\System32\combase.dll
|
||||
0x00007ffee6110000 - 0x00007ffee6140000 C:\WINDOWS\SYSTEM32\Wldp.dll
|
||||
0x00007ffee7a70000 - 0x00007ffee7b1d000 C:\WINDOWS\System32\SHCORE.dll
|
||||
0x00007ffee7e30000 - 0x00007ffee7e85000 C:\WINDOWS\System32\shlwapi.dll
|
||||
0x00007ffee65f0000 - 0x00007ffee660f000 C:\WINDOWS\SYSTEM32\profapi.dll
|
||||
0x00007ffedf850000 - 0x00007ffedf85a000 E:\Java\JDK8\jre\bin\dt_socket.dll
|
||||
0x00007ffee5e70000 - 0x00007ffee5eda000 C:\WINDOWS\system32\mswsock.dll
|
||||
0x00007ffee44a0000 - 0x00007ffee4684000 C:\WINDOWS\SYSTEM32\dbghelp.dll
|
||||
0x00007ffee68e0000 - 0x00007ffee6962000 C:\WINDOWS\System32\bcryptPrimitives.dll
|
||||
|
||||
VM Arguments:
|
||||
jvm_args: -agentlib:jdwp=transport=dt_socket,address=127.0.0.1:5541,suspend=y,server=n -ea -Didea.test.cyclic.buffer.size=1048576 -javaagent:C:\Users\marklue\AppData\Local\JetBrains\IntelliJIdea2021.1\captureAgent\debugger-agent.jar -Dfile.encoding=UTF-8
|
||||
java_command: com.intellij.rt.junit.JUnitStarter -ideVersion5 -junit4 com.markilue.leecode.listnode.MyLinkedList,test
|
||||
java_class_path (initial): D:\software\JetBrains\IntelliJ IDEA 2021.1\lib\idea_rt.jar;D:\software\JetBrains\IntelliJ IDEA 2021.1\plugins\junit\lib\junit5-rt.jar;D:\software\JetBrains\IntelliJ IDEA 2021.1\plugins\junit\lib\junit-rt.jar;E:\Java\JDK8\jre\lib\charsets.jar;E:\Java\JDK8\jre\lib\deploy.jar;E:\Java\JDK8\jre\lib\ext\access-bridge-64.jar;E:\Java\JDK8\jre\lib\ext\cldrdata.jar;E:\Java\JDK8\jre\lib\ext\dnsns.jar;E:\Java\JDK8\jre\lib\ext\jaccess.jar;E:\Java\JDK8\jre\lib\ext\jfxrt.jar;E:\Java\JDK8\jre\lib\ext\localedata.jar;E:\Java\JDK8\jre\lib\ext\nashorn.jar;E:\Java\JDK8\jre\lib\ext\sunec.jar;E:\Java\JDK8\jre\lib\ext\sunjce_provider.jar;E:\Java\JDK8\jre\lib\ext\sunmscapi.jar;E:\Java\JDK8\jre\lib\ext\sunpkcs11.jar;E:\Java\JDK8\jre\lib\ext\zipfs.jar;E:\Java\JDK8\jre\lib\javaws.jar;E:\Java\JDK8\jre\lib\jce.jar;E:\Java\JDK8\jre\lib\jfr.jar;E:\Java\JDK8\jre\lib\jfxswt.jar;E:\Java\JDK8\jre\lib\jsse.jar;E:\Java\JDK8\jre\lib\management-agent.jar;E:\Java\JDK8\jre\lib\plugin.jar;E:\Java\JDK8\jre\lib\resources.jar;E:\Java\JDK8\jre\lib\rt.jar;D:\example\self_example\Leecode\target\classes;E:\maven\apache-maven-3.5.4-bin\RepMaven\org\projectlombok\lombok\1.18.24\lombok-1.18.24.jar;E:\maven\apache-maven-3.5.4-bin\RepMaven\junit\junit\4.13.2\junit-4.13.2.jar;E:\maven\apache-maven-3.5.4-bin\RepMaven\org\hamcrest\hamcrest-core\1.3\hamcrest-core-1.3.jar;C:\Users\marklue\AppData\Local\JetBrains\IntelliJIdea2021.1\captureAgent\debugger-agent.jar
|
||||
Launcher Type: SUN_STANDARD
|
||||
|
||||
Environment Variables:
|
||||
JAVA_HOME=E:\Java\JDK8
|
||||
PATH=C:\WINDOWS\system32;C:\WINDOWS;C:\WINDOWS\System32\Wbem;C:\WINDOWS\System32\WindowsPowerShell\v1.0\;C:\WINDOWS\System32\OpenSSH\;D:\software\RAR½âѹ¹¤¾ß\Bandizip\;D:\;oftware\nodejs\;E:\Java\JDK8\bin;E:\maven\apache-maven-3.5.4-bin\apache-maven-3.5.4\bin;E:\scala\scala-2.12.11\bin;D:\software\anaconda\pkgs\python-3.7.11-h6244533_0;D:\software\anaconda\Scripts;D:\software\Git\Git\cmd;D:\software\nodejs;C:\Users\marklue\AppData\Local\Microsoft\WindowsApps;C:\Users\marklue\AppData\Roaming\npm;D:\software\JetBrains\PyCharm 2020.1\bin;
|
||||
USERNAME=marklue
|
||||
OS=Windows_NT
|
||||
PROCESSOR_IDENTIFIER=Intel64 Family 6 Model 142 Stepping 10, GenuineIntel
|
||||
|
||||
|
||||
|
||||
--------------- S Y S T E M ---------------
|
||||
|
||||
OS: Windows 10.0 , 64 bit Build 19041 (10.0.19041.1806)
|
||||
|
||||
CPU:total 8 (initial active 8) (4 cores per cpu, 2 threads per core) family 6 model 142 stepping 10, cmov, cx8, fxsr, mmx, sse, sse2, sse3, ssse3, sse4.1, sse4.2, popcnt, avx, avx2, aes, clmul, erms, 3dnowpref, lzcnt, ht, tsc, tscinvbit, bmi1, bmi2, adx
|
||||
|
||||
Memory: 4k page, physical 8272104k(2053676k free), swap 11902816k(1707060k free)
|
||||
|
||||
vm_info: Java HotSpot(TM) 64-Bit Server VM (25.311-b11) for windows-amd64 JRE (1.8.0_311-b11), built on Sep 27 2021 05:15:14 by "java_re" with MS VC++ 15.9 (VS2017)
|
||||
|
||||
time: Mon Sep 5 12:31:48 2022
|
||||
timezone: Öйú±ê׼ʱ¼ä
|
||||
elapsed time: 1581.277238 seconds (0d 0h 26m 21s)
|
||||
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
package com.markilue.leecode;
|
||||
|
||||
import cn.hutool.json.JSONObject;
|
||||
import cn.hutool.json.JSONUtil;
|
||||
|
||||
|
||||
/**
|
||||
*@BelongsProject: Leecode
|
||||
*@BelongsPackage: com.markilue.leecode
|
||||
*@Author: markilue
|
||||
*@CreateTime: 2023-06-01 17:36
|
||||
*@Description: TODO
|
||||
*@Version: 1.0
|
||||
*/
|
||||
public class Test1 {
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -137,4 +137,62 @@ public class LC_1254_ClosedIsland {
|
|||
return dfs(grid, i - 1, j) & dfs(grid, i + 1, j) & dfs(grid, i, j - 1) & dfs(grid, i, j + 1);
|
||||
}
|
||||
|
||||
|
||||
//如何为封闭岛屿:有边界
|
||||
public int closedIsland3(int[][] grid) {
|
||||
|
||||
int result = 0;
|
||||
for (int i = 0; i < grid.length; i++) {
|
||||
for (int j = 0; j < grid[0].length; j++) {
|
||||
if (grid[i][j] == 0) {
|
||||
if (find(grid, i, j)) {
|
||||
result++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public boolean find(int[][] grid, int i, int j) {
|
||||
if (i < 0 || j < 0 || i >= grid.length || j >= grid[0].length) {
|
||||
return false;//碰到了边界还没有返回,则不是封闭岛屿
|
||||
}
|
||||
if (grid[i][j] == 1 || grid[i][j] == 2) {
|
||||
//遇到了边界
|
||||
return true;
|
||||
}
|
||||
grid[i][j] = 2;
|
||||
return find(grid, i + 1, j) & find(grid, i - 1, j) & find(grid, i, j + 1) & find(grid, i, j - 1);
|
||||
}
|
||||
|
||||
|
||||
public int closedIsland4(int[][] grid) {
|
||||
int result = 0;
|
||||
for (int i = 0; i < grid.length; i++) {
|
||||
for (int j = 0; j < grid[0].length; j++) {
|
||||
if (grid[i][j] == 0 && dfs2(grid, i, j)) {
|
||||
result++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
private boolean dfs2(int[][] grid, int i, int j) {
|
||||
//碰到1则为封闭岛屿;超出边界则为非封闭岛屿
|
||||
if (i < 0 || j < 0 || i >= grid.length || j >= grid[0].length) {
|
||||
return false;
|
||||
}
|
||||
if (grid[i][j] == 1) {
|
||||
return true;
|
||||
}
|
||||
grid[i][j] = 1;
|
||||
|
||||
|
||||
return dfs2(grid, i + 1, j) & dfs2(grid, i, j + 1) & dfs2(grid, i - 1, j) & dfs2(grid, i, j - 1);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -143,6 +143,7 @@ public class LC_127_LadderLength {
|
|||
adjacent.add(new ArrayList<>());
|
||||
for (int i = 0; i < wordList.size(); ++i) {
|
||||
String s = wordList.get(i);
|
||||
//构建图
|
||||
for (int j = i + 1; j < wordList.size(); ++j) {
|
||||
if (judge(s, wordList.get(j))) {
|
||||
adjacent.get(i).add(j);
|
||||
|
|
@ -153,6 +154,7 @@ public class LC_127_LadderLength {
|
|||
return bfs(wordList.size() - 1, endIndex, adjacent, new boolean[wordList.size()]);
|
||||
}
|
||||
|
||||
//i为开始寻找的单词的index;j为结束的单词的index
|
||||
private int bfs(int i, int j, List<List<Integer>> adjacent, boolean[] visited) {
|
||||
int distance = 0;
|
||||
ArrayDeque<Integer> queue = new ArrayDeque<>();
|
||||
|
|
@ -163,8 +165,8 @@ public class LC_127_LadderLength {
|
|||
for (int k = 0; k < size; ++k) {
|
||||
int v = queue.pollFirst();
|
||||
visited[v] = true;
|
||||
if (v == j) return distance;
|
||||
List<Integer> edges = adjacent.get(v);
|
||||
if (v == j) return distance;//找到了
|
||||
List<Integer> edges = adjacent.get(v);//获取他的子矩阵挨个遍历
|
||||
for (int e : edges) {
|
||||
if (!visited[e]) {
|
||||
queue.addLast(e);
|
||||
|
|
|
|||
|
|
@ -78,4 +78,26 @@ public class LC_82_DeleteDuplicatesII {
|
|||
}
|
||||
|
||||
|
||||
public ListNode deleteDuplicates3(ListNode head) {
|
||||
if (head == null) return null;
|
||||
ListNode fake = new ListNode();
|
||||
fake.next = head;
|
||||
ListNode temp = fake;
|
||||
|
||||
while (temp.next != null && temp.next.next != null) {
|
||||
//只有这样,才能有重复的,才需要删除
|
||||
if (temp.next.val == temp.next.next.val) {
|
||||
ListNode tempNext = temp.next;
|
||||
while (tempNext.next != null && tempNext.val == tempNext.next.val) {
|
||||
tempNext = tempNext.next;
|
||||
}
|
||||
temp.next = tempNext.next;
|
||||
} else {
|
||||
temp = temp.next;
|
||||
}
|
||||
}
|
||||
return fake.next;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -91,7 +91,7 @@ public class LC_503_NextGreaterElements {
|
|||
stack.pop();
|
||||
}
|
||||
if (i < n) result[i] = stack.isEmpty() ? -1 : stack.peek();
|
||||
stack.push(nums[i%n]);
|
||||
stack.push(nums[i % n]);
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
@ -114,4 +114,25 @@ public class LC_503_NextGreaterElements {
|
|||
return result;
|
||||
|
||||
}
|
||||
|
||||
|
||||
public int[] nextGreaterElements3(int[] nums) {
|
||||
|
||||
int n = nums.length;
|
||||
ArrayDeque<Integer> stack = new ArrayDeque<>();
|
||||
|
||||
int[] result = new int[n];
|
||||
|
||||
for (int i = n * 2 - 2; i >= 0; i--) {
|
||||
while (!stack.isEmpty() && stack.peek() <= nums[i % n]) {//寻找第一个比当前数大的数
|
||||
stack.pop();
|
||||
}
|
||||
if (i < n) result[i] = stack.isEmpty() ? -1 : stack.peek();
|
||||
stack.push(nums[i % n]);
|
||||
}
|
||||
|
||||
return result;
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package com.markilue.leecode.hot100.interviewHot.union_find.second;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
|
|
@ -12,6 +14,12 @@ import java.util.ArrayList;
|
|||
*/
|
||||
public class LC_685_FindRedundantConnection {
|
||||
|
||||
@Test
|
||||
public void test() {
|
||||
int[][] edges = {{1, 2}, {1, 3}, {2, 3}};
|
||||
System.out.println(findRedundantDirectedConnection(edges));
|
||||
}
|
||||
|
||||
int[] father;//父节点
|
||||
int n;//父节点个数
|
||||
|
||||
|
|
@ -84,7 +92,7 @@ public class LC_685_FindRedundantConnection {
|
|||
|
||||
ArrayList<Integer> twoDegree = new ArrayList<>();
|
||||
//判断入度为2的节点,该节点一定有子节点需要删除;反向遍历,因为后面的删除优先级更高
|
||||
for (int i = edges.length-1; i >=0; i--) {
|
||||
for (int i = edges.length - 1; i >= 0; i--) {
|
||||
if (inDegree[edges[i][1]] == 2) {
|
||||
twoDegree.add(i);//这个节点需要删除
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,109 @@
|
|||
package com.markilue.leecode.hot100.interviewHot.union_find.second;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
*@BelongsProject: Leecode
|
||||
*@BelongsPackage: com.markilue.leecode.hot100.interviewHot.union_find.second
|
||||
*@Author: markilue
|
||||
*@CreateTime: 2023-06-12 09:58
|
||||
*@Description: TODO
|
||||
*@Version: 1.0
|
||||
*/
|
||||
public class LC_685_FindRedundantConnection1 {
|
||||
|
||||
@Test
|
||||
public void test() {
|
||||
int[][] edges = {{1, 2}, {1, 3}, {2, 3}};
|
||||
System.out.println(findRedundantDirectedConnection(edges));
|
||||
}
|
||||
|
||||
int[] father;
|
||||
|
||||
private void init(int[] father) {
|
||||
for (int i = 0; i < father.length; i++) {
|
||||
father[i] = i;
|
||||
}
|
||||
}
|
||||
|
||||
private int find(int u) {
|
||||
if (father[u] == u) return u;
|
||||
father[u] = find(father[u]);
|
||||
return father[u];
|
||||
}
|
||||
|
||||
private void union(int u, int v) {
|
||||
u = find(u);
|
||||
v = find(v);
|
||||
if (u == v) return;
|
||||
father[v] = u;
|
||||
}
|
||||
|
||||
private boolean same(int u, int v) {
|
||||
u = find(u);
|
||||
v = find(v);
|
||||
return u == v;
|
||||
}
|
||||
|
||||
private int[] removeOne(int[][] edges) {
|
||||
init(father);
|
||||
for (int[] edge : edges) {
|
||||
if (same(edge[0], edge[1])) {
|
||||
return edge;
|
||||
}
|
||||
union(edge[0], edge[1]);
|
||||
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private boolean removeIfCan(int[][] edges, int i) {
|
||||
init(father);
|
||||
//遇上i就跳过
|
||||
for (int i1 = 0; i1 < edges.length; i1++) {
|
||||
if (i1 == i) continue;
|
||||
if (same(edges[i1][0], edges[i1][1])) {
|
||||
return false;
|
||||
}
|
||||
|
||||
union(edges[i1][0], edges[i1][1]);
|
||||
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
//虽然是有向图,但是一共就三种情况,两种入度为2的情况可以直接判断出来,最后可以转为无向图的情况
|
||||
public int[] findRedundantDirectedConnection(int[][] edges) {
|
||||
father = new int[1010];
|
||||
//判断入度为2的情况
|
||||
int[] countDegree = new int[1010];
|
||||
|
||||
for (int[] edge : edges) {
|
||||
countDegree[edge[1]]++;
|
||||
}
|
||||
|
||||
ArrayList<Integer> twoDegree = new ArrayList<>();//入度为2的节点
|
||||
|
||||
for (int i = edges.length - 1; i >= 0; i--) {
|
||||
if (countDegree[edges[i][1]] > 1) twoDegree.add(i);
|
||||
}
|
||||
|
||||
if (!twoDegree.isEmpty()) {
|
||||
if (removeIfCan(edges, twoDegree.get(0))) {
|
||||
return edges[twoDegree.get(0)];
|
||||
} else {
|
||||
return edges[twoDegree.get(1)];
|
||||
}
|
||||
}
|
||||
|
||||
//只用删除一个
|
||||
return removeOne(edges);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -21,7 +21,7 @@ public class T34_75_SortColors {
|
|||
@Test
|
||||
public void test() {
|
||||
int[] nums = {2, 0, 2, 1, 1, 0};
|
||||
sortColors1(nums);
|
||||
sortColors3(nums);
|
||||
System.out.println(Arrays.toString(nums));
|
||||
}
|
||||
|
||||
|
|
@ -99,4 +99,28 @@ public class T34_75_SortColors {
|
|||
|
||||
|
||||
}
|
||||
|
||||
|
||||
//三刷
|
||||
public void sortColors3(int[] nums) {
|
||||
|
||||
int index0 = 0;
|
||||
int index1 = 0;
|
||||
|
||||
for (int i = 0; i < nums.length; i++) {
|
||||
if (nums[i] == 0) {
|
||||
swap(nums, index0, i);
|
||||
if (index0 < index1) {//交换到1了,交换回来
|
||||
swap(nums, index1, i);
|
||||
}
|
||||
index1++;
|
||||
index0++;
|
||||
} else if (nums[i] == 1) {
|
||||
swap(nums, index1, i);
|
||||
index1++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -99,4 +99,22 @@ public class T49_124_MaxPathSum {
|
|||
return Math.max(left, right) + root.val;
|
||||
|
||||
}
|
||||
|
||||
|
||||
public int maxPathSum3(TreeNode root) {
|
||||
findCurMax(root);
|
||||
return maxSum;
|
||||
}
|
||||
|
||||
//返回要当前节点的最大值;不要的情况已经在子节点中讨论过了
|
||||
public int findCurMax(TreeNode node) {
|
||||
if (node == null) {
|
||||
return 0;
|
||||
}
|
||||
int left = Math.max(findCurMax(node.left), 0);
|
||||
int right = Math.max(findCurMax(node.right), 0);
|
||||
|
||||
maxSum = Math.max(maxSum, left + right + node.val);
|
||||
return Math.max(left, right) + node.val;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -203,3 +203,106 @@ class LRUCache {
|
|||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class LRUCache1 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
LRUCache1 lRUCache = new LRUCache1(2);
|
||||
lRUCache.put(1, 1); // 缓存是 {1=1}
|
||||
lRUCache.put(2, 2); // 缓存是 {1=1, 2=2}
|
||||
System.out.println(lRUCache.get(1)); // 返回 1
|
||||
lRUCache.put(3, 3); // 该操作会使得关键字 2 作废,缓存是 {1=1, 3=3}
|
||||
System.out.println(lRUCache.get(2)); // 返回 -1 (未找到)
|
||||
lRUCache.put(4, 4); // 该操作会使得关键字 1 作废,缓存是 {4=4, 3=3}
|
||||
System.out.println(lRUCache.get(1)); // 返回 -1 (未找到)
|
||||
System.out.println(lRUCache.get(3)); // 返回 3
|
||||
System.out.println(lRUCache.get(4)); // 返回 4
|
||||
}
|
||||
|
||||
|
||||
Map<Integer,Node> cache;
|
||||
int capacity;
|
||||
int size;
|
||||
Node head;
|
||||
Node tail;
|
||||
|
||||
|
||||
public LRUCache1(int capacity){
|
||||
this.capacity=capacity;
|
||||
cache =new HashMap<>();
|
||||
size=0;
|
||||
head =new Node();
|
||||
tail=new Node();
|
||||
head.next=tail;
|
||||
tail.pre=head;
|
||||
}
|
||||
|
||||
|
||||
public int get(int key) {
|
||||
Node node = cache.get(key);
|
||||
if(node==null){
|
||||
return -1;
|
||||
}else{
|
||||
//将该节点挪到头部
|
||||
deleteNode(node);
|
||||
removeToHead(node);
|
||||
return node.value;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void put(int key, int value) {
|
||||
Node node = cache.get(key);
|
||||
if(node==null){
|
||||
//没有,则添加
|
||||
if(size==capacity){
|
||||
//满了,删除
|
||||
cache.remove(tail.pre.key);
|
||||
deleteNode(tail.pre);
|
||||
size--;
|
||||
}
|
||||
Node newNode = new Node(key,value);
|
||||
removeToHead(newNode);
|
||||
cache.put(key,newNode);
|
||||
size++;
|
||||
}else{
|
||||
//更新,将该节点挪到头部
|
||||
node.value=value;
|
||||
deleteNode(node);
|
||||
removeToHead(node);
|
||||
}
|
||||
}
|
||||
|
||||
private void deleteNode(Node node){
|
||||
node.pre.next=node.next;
|
||||
node.next.pre=node.pre;
|
||||
}
|
||||
|
||||
private void removeToHead(Node node){
|
||||
node.next=head.next;
|
||||
head.next.pre=node;
|
||||
head.next=node;
|
||||
node.pre=head;
|
||||
}
|
||||
|
||||
|
||||
class Node {
|
||||
|
||||
int key;
|
||||
int value;
|
||||
Node pre;
|
||||
Node next;
|
||||
|
||||
public Node() {
|
||||
}
|
||||
|
||||
public Node(int key, int value) {
|
||||
this.key = key;
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ public class T67_221_MaximalSquare {
|
|||
{'1', '1', '1', '1', '1'},
|
||||
{'1', '0', '0', '1', '0'}
|
||||
};
|
||||
System.out.println(maximalSquare1(matrix));
|
||||
System.out.println(maximalSquare2(matrix));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -100,4 +100,34 @@ public class T67_221_MaximalSquare {
|
|||
return result * result;
|
||||
|
||||
}
|
||||
|
||||
|
||||
public int maximalSquare2(char[][] matrix) {
|
||||
int m = matrix.length;
|
||||
int n = matrix[0].length;
|
||||
int[][] dp = new int[m][n];
|
||||
int result = Integer.MIN_VALUE;
|
||||
for (int i = 0; i < n; i++) {
|
||||
dp[0][i] = matrix[0][i] == '1' ? 1 : 0;
|
||||
result = Math.max(result,dp[0][i]);
|
||||
}
|
||||
|
||||
for (int i = 0; i < m; i++) {
|
||||
dp[i][0] = matrix[i][0] == '1' ? 1 : 0;
|
||||
result = Math.max(result,dp[0][i]);
|
||||
}
|
||||
|
||||
for (int i = 1; i < m; i++) {
|
||||
for (int j = 1; j < n; j++) {
|
||||
if (matrix[i][j] == '1') {
|
||||
dp[i][j] = Math.min(Math.min(dp[i - 1][j], dp[i][j - 1]), dp[i - 1][j - 1]) + 1;
|
||||
}
|
||||
if (result < dp[i][j]) result = dp[i][j];
|
||||
}
|
||||
}
|
||||
return result * result;
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,4 +33,22 @@ public class T69_234_IsPalindrome {
|
|||
return false;
|
||||
|
||||
}
|
||||
|
||||
public boolean isPalindrome1(ListNode head) {
|
||||
root = head;
|
||||
return find(head);
|
||||
}
|
||||
|
||||
public boolean find(ListNode node) {
|
||||
if (node == null) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (find(node.next) && node.val == root.val) {
|
||||
root = root.next;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ public class T72_239_MaxSlidingWindow {
|
|||
public void test() {
|
||||
int[] nums = {1, 3, -1, -3, 5, 3, 6, 7};
|
||||
int k = 3;
|
||||
System.out.println(Arrays.toString(maxSlidingWindow1(nums, k)));
|
||||
System.out.println(Arrays.toString(maxSlidingWindow2(nums, k)));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -110,4 +110,37 @@ public class T72_239_MaxSlidingWindow {
|
|||
}
|
||||
|
||||
|
||||
//构造一个单调栈,只有后面的值比前面的值小,前面的值就是无用的
|
||||
public int[] maxSlidingWindow2(int[] nums, int k) {
|
||||
|
||||
ArrayDeque<Integer> stack = new ArrayDeque<>();
|
||||
|
||||
int[] result = new int[nums.length - k + 1];
|
||||
|
||||
//构造第一个窗口
|
||||
for (int i = 0; i < k; i++) {
|
||||
while (!stack.isEmpty() && nums[stack.peekLast()] <= nums[i]) {
|
||||
stack.pollLast();
|
||||
}
|
||||
stack.offerLast(i);
|
||||
}
|
||||
result[0] = nums[stack.peekFirst()];
|
||||
|
||||
for (int i = k; i < nums.length; i++) {
|
||||
//先排除过期元素
|
||||
while (!stack.isEmpty() && i - stack.peekFirst() >= k) {
|
||||
stack.pollFirst();
|
||||
}
|
||||
//添加在合适的位置
|
||||
while (!stack.isEmpty() && nums[stack.peekLast()] <= nums[i]) {
|
||||
stack.pollLast();
|
||||
}
|
||||
stack.offerLast(i);
|
||||
result[i - k + 1] = stack.isEmpty() ? -1 : nums[stack.peekFirst()];
|
||||
}
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -103,11 +103,11 @@ public class T79_301_RemoveInvalidParentheses {
|
|||
}
|
||||
//判断完之后就需要进行删除;同时,不可能存在left,right都大于0的情况
|
||||
List<String> result = new ArrayList<>();
|
||||
remove(s, left, right, result,0);
|
||||
remove(s, left, right, result, 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
public void remove(String s, int left, int right, List<String> res,int start) {
|
||||
public void remove(String s, int left, int right, List<String> res, int start) {
|
||||
if (left == 0 && right == 0) {
|
||||
if (isValid(s)) {
|
||||
res.add(s);
|
||||
|
|
@ -118,17 +118,62 @@ public class T79_301_RemoveInvalidParentheses {
|
|||
//必须在删除的后面继续删才可以
|
||||
for (int i = start; i < s.length(); i++) {
|
||||
//去重
|
||||
if(i>start&&s.charAt(i)==s.charAt(i-1))continue;
|
||||
if (i > start && s.charAt(i) == s.charAt(i - 1)) continue;
|
||||
//不够了
|
||||
if (left + right > s.length() - i) return;
|
||||
if (left > 0 && s.charAt(i) == '(') {
|
||||
//可以移除左边
|
||||
remove(s.substring(0, i) + s.substring(i + 1), left - 1, right, res,i);
|
||||
remove(s.substring(0, i) + s.substring(i + 1), left - 1, right, res, i);
|
||||
}
|
||||
|
||||
if (right > 0 && s.charAt(i) == ')') {
|
||||
//可以移除左边
|
||||
remove(s.substring(0, i) + s.substring(i + 1), left, right - 1, res,i);
|
||||
remove(s.substring(0, i) + s.substring(i + 1), left, right - 1, res, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//判断左括号右括号哪边多,哪边多就把哪边的删了
|
||||
public List<String> removeInvalidParentheses2(String s) {
|
||||
|
||||
int left = 0;
|
||||
int right = 0;
|
||||
for (int i = 0; i < s.length(); i++) {
|
||||
char cur = s.charAt(i);
|
||||
if (cur == '(') {
|
||||
left++;
|
||||
} else if (cur == ')') {
|
||||
if (left > 0) {
|
||||
left--;
|
||||
} else {
|
||||
right++;
|
||||
}
|
||||
}
|
||||
}
|
||||
List<String> result = new ArrayList<>();
|
||||
remove(left, right, s, result, 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
public void remove(int left, int right, String s, List<String> result, int start) {
|
||||
if (left == 0 && right == 0) {
|
||||
if (isValid(s)) {
|
||||
result.add(new String(s));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for (int i = start; i < s.length(); i++) {
|
||||
char cur = s.charAt(i);
|
||||
if (i > start && cur == s.charAt(i - 1)) {
|
||||
continue;
|
||||
}
|
||||
if (left > 0 && cur == '(') {
|
||||
remove(left - 1, right, s.substring(0, i) + s.substring(i + 1), result, i);
|
||||
}
|
||||
if (right > 0 && cur == ')') {
|
||||
remove(left, right - 1, s.substring(0, i) + s.substring(i + 1), result, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,41 @@
|
|||
package com.markilue.leecode.interview.OPPO.T0411;
|
||||
|
||||
import java.util.Scanner;
|
||||
|
||||
/**
|
||||
*@BelongsProject: Leecode
|
||||
*@BelongsPackage: com.markilue.leecode.interview.OPPO.T0411
|
||||
*@Author: markilue
|
||||
*@CreateTime: 2023-06-14 10:41
|
||||
*@Description: TODO
|
||||
*@Version: 1.0
|
||||
*/
|
||||
public class Question1 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
Scanner sc = new Scanner(System.in);
|
||||
String s = sc.next();
|
||||
solve(s);
|
||||
}
|
||||
|
||||
private static void solve(String s) {
|
||||
int left = 0;
|
||||
int right = 0;
|
||||
int xiaochu = 0;
|
||||
for (int i = 0; i < s.length(); i++) {
|
||||
char cur = s.charAt(i);
|
||||
if (cur == '(') {
|
||||
left++;
|
||||
} else if (cur == ')') {
|
||||
if (left > 0) {
|
||||
left--;
|
||||
xiaochu++;
|
||||
} else {
|
||||
right++;
|
||||
}
|
||||
}
|
||||
}
|
||||
System.out.println(s.length() - xiaochu);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
package com.markilue.leecode.interview.OPPO.T0411;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Scanner;
|
||||
|
||||
/**
|
||||
*@BelongsProject: Leecode
|
||||
*@BelongsPackage: com.markilue.leecode.interview.OPPO.T0411
|
||||
*@Author: markilue
|
||||
*@CreateTime: 2023-06-14 11:02
|
||||
*@Description: TODO
|
||||
*@Version: 1.0
|
||||
*/
|
||||
public class Question2 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
Scanner sc = new Scanner(System.in);
|
||||
int n = sc.nextInt();
|
||||
solve(n);
|
||||
}
|
||||
|
||||
private static void solve(int n) {
|
||||
if (n % 2 == 0) {
|
||||
if (n == 2) {
|
||||
System.out.println(2);
|
||||
return;
|
||||
}
|
||||
System.out.println(cal(n / 2) * 2 * 2 % mod);
|
||||
} else {
|
||||
System.out.println(cal(n / 2 + 1) * cal(n / 2) % mod);
|
||||
}
|
||||
}
|
||||
|
||||
static long[] memo = new long[(int) 1e5];
|
||||
static long mod = (long) (1e9 + 7);
|
||||
|
||||
public static long cal(int n) {
|
||||
if (memo[n] != 0) {
|
||||
return memo[n];
|
||||
} else if (n == 1) {
|
||||
return 1;
|
||||
}
|
||||
memo[n] = n * cal(n - 1) % mod;
|
||||
return memo[n];
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
package com.markilue.leecode.interview.baidu.T0410;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Scanner;
|
||||
|
||||
/**
|
||||
*@BelongsProject: Leecode
|
||||
*@BelongsPackage: com.markilue.leecode.interview.baidu.T0410
|
||||
*@Author: markilue
|
||||
*@CreateTime: 2023-06-14 11:32
|
||||
*@Description: TODO
|
||||
*@Version: 1.0
|
||||
*/
|
||||
public class Question1 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
Scanner sc = new Scanner(System.in);
|
||||
int n = sc.nextInt();
|
||||
int k = sc.nextInt();
|
||||
int[] nums = new int[n];
|
||||
for (int i = 0; i < n; i++) {
|
||||
nums[i] = sc.nextInt();
|
||||
}
|
||||
solve(nums, k);
|
||||
}
|
||||
|
||||
//猜测:前k-1个单独分最小的前k-1个数;后面全在一起
|
||||
private static void solve(int[] nums, int k) {
|
||||
Arrays.sort(nums);
|
||||
|
||||
double result = 0;
|
||||
//前k-1个单独是一个
|
||||
for (int i = 0; i < k - 1; i++) {
|
||||
result += nums[i];
|
||||
}
|
||||
//后面全在一起
|
||||
double temp = 0;
|
||||
for (int i = k - 1; i < nums.length; i++) {
|
||||
temp += nums[i];
|
||||
}
|
||||
double avg = temp / (nums.length - k + 1);
|
||||
System.out.println((result+avg));
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
package com.markilue.leecode.interview.huawei.T0412;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Scanner;
|
||||
|
||||
/**
|
||||
*@BelongsProject: Leecode
|
||||
*@BelongsPackage: com.markilue.leecode.interview.huawei.T0412
|
||||
*@Author: markilue
|
||||
*@CreateTime: 2023-06-12 11:38
|
||||
*@Description:
|
||||
* TODO 交易系统的降级策略:
|
||||
* 有一个核心交易系统接口被N个上游系统调用,每个上游系统的调用量R=[R1,R2.....,RN].
|
||||
* 由于核心交易系统集群故障,需要暂时系统降级限制调用,核心交易系统能接受的最大调用量为cnt。
|
||||
* 设置降级规则如下:
|
||||
* 如果sum(R1.R2..RN)小于等于cnt,则全部可以正常调用,返回-1;
|
||||
* 如果sum(R1.R2....RN)大于cnt,设置一个阈值limit,
|
||||
* 如果某个上游系统发起的调用量超过limit,就将该上游系统的调用量限制为limit,
|
||||
* 其余未达到limit的系统可以正常发起调用。
|
||||
* 求出这个最大的limit (limit可以为0)
|
||||
* 此题目对效率有要求,请选择高效的方式。
|
||||
*@Version: 1.0
|
||||
*/
|
||||
public class Question1 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
Scanner sc = new Scanner(System.in);
|
||||
int[] nums = Arrays.stream(sc.nextLine().split(" ")).mapToInt(Integer::parseInt).toArray();
|
||||
int threshold = Integer.parseInt(sc.nextLine());
|
||||
|
||||
solve(nums, threshold);
|
||||
}
|
||||
|
||||
//二分寻找最大值
|
||||
private static void solve(int[] nums, int threshold) {
|
||||
int max = Math.min(threshold,(int) 1e5);
|
||||
int min = threshold / nums.length;
|
||||
while (min < max) {
|
||||
int mid = min + ((max - min + 1) >> 1);
|
||||
if (check(nums, mid, threshold)) min = mid;
|
||||
else max = mid - 1;
|
||||
}
|
||||
System.out.println(min);
|
||||
}
|
||||
|
||||
private static boolean check(int[] nums, int max, int threshold) {
|
||||
int result = 0;
|
||||
for (int i = 0; i < nums.length; i++) {
|
||||
if (nums[i] < max) result += nums[i];
|
||||
else result += max;
|
||||
}
|
||||
return result <= threshold;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
package com.markilue.leecode.interview.huawei.T0412;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
*@BelongsProject: Leecode
|
||||
*@BelongsPackage: com.markilue.leecode.interview.huawei.T0412
|
||||
*@Author: markilue
|
||||
*@CreateTime: 2023-06-13 11:24
|
||||
*@Description:
|
||||
* TODO 获取最多的食物:
|
||||
* 主办方设计了一个获取食物的游戏。
|
||||
* 游戏的地图由N个方格组成,每个方格上至多2个传送门,通过传送门可将参与者传送至指定的其它方格。
|
||||
* 同时,每个方格上标注了三个数字:
|
||||
* (1) 第一个数字id:代表方格的编号,从0到N-1,每个方格各不相同
|
||||
* (2)第二个数字parent-id:代表从编号为parent-id的方格可以通过传送门传送到当前方格(-1则表示没有任何方格可以通过传送门传送到此方格,这样的方格在地图中有且仅有一个)
|
||||
* (3)第三个数字value: 取值在[100,100]的整数值,正整数代表参与者得到相队取值单位的食物,负整数代表失去相应数值单位的食物(参与者可能存在临时持有食物为负数的情况),0则代表无变化。
|
||||
*@Version: 1.0
|
||||
*/
|
||||
public class Question2 {
|
||||
|
||||
static int max = Integer.MIN_VALUE;
|
||||
|
||||
public static void main(String[] args) {
|
||||
Scanner sc = new Scanner(System.in);
|
||||
int num = sc.nextInt();
|
||||
List<List<Node>> edges = new ArrayList<>();
|
||||
HashMap<Integer, Integer> map = new HashMap<>();//<id,value>
|
||||
for (int i = 0; i < num; i++) {
|
||||
edges.add(new ArrayList<>());
|
||||
}
|
||||
for (int i = 0; i < num; i++) {
|
||||
int id = sc.nextInt();
|
||||
int parentId = sc.nextInt();
|
||||
int value = sc.nextInt();
|
||||
map.put(id, value);
|
||||
if (parentId != -1) {
|
||||
edges.get(parentId).add(new Node(id, value));
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < num; i++) {
|
||||
solve(edges, map, 0, i);
|
||||
}
|
||||
System.out.println(max);
|
||||
}
|
||||
|
||||
public static void solve(List<List<Node>> edges, Map<Integer, Integer> map, int curValue, int curIndex) {
|
||||
|
||||
curValue += map.get(curIndex);
|
||||
max = Math.max(max, curValue);
|
||||
List<Node> children = edges.get(curIndex);
|
||||
|
||||
for (Node child : children) {
|
||||
solve(edges, map, curValue, child.id);
|
||||
}
|
||||
}
|
||||
|
||||
static class Node {
|
||||
int value;
|
||||
int id;
|
||||
|
||||
|
||||
public Node() {
|
||||
}
|
||||
|
||||
public Node(int id, int value) {
|
||||
this.id = id;
|
||||
this.value = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
package com.markilue.leecode.interview.huawei.T0531;
|
||||
|
||||
import com.markilue.leecode.tree.TreeNode;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Scanner;
|
||||
|
|
@ -38,7 +39,7 @@ public class Question3 {
|
|||
{-2, -3, 4},
|
||||
};
|
||||
// calculateMaxRectangleSum(2, 3, income);
|
||||
calculate(2, 3, income);
|
||||
calculate1(2, 3, income);
|
||||
}
|
||||
|
||||
private static void calculateMaxRectangleSum(int m, int n, int[][] matrix) {
|
||||
|
|
@ -104,4 +105,40 @@ public class Question3 {
|
|||
|
||||
}
|
||||
|
||||
|
||||
//二刷尝试:由于需要计算那一块的面积,但是不知道那一块的具体大小,所以考虑使用前缀和进行计算
|
||||
private static void calculate1(int m, int n, int[][] matrix) {
|
||||
int[][] prefix = new int[m + 1][n + 1];
|
||||
|
||||
//构造前缀和数组
|
||||
for (int i = 1; i < m + 1; i++) {
|
||||
for (int j = 1; j < n + 1; j++) {
|
||||
prefix[i][j] = prefix[i - 1][j] + prefix[i][j - 1] - prefix[i - 1][j - 1] + matrix[i - 1][j - 1];
|
||||
}
|
||||
}
|
||||
|
||||
//挨个遍历寻找面积最大值
|
||||
int result = Integer.MIN_VALUE;
|
||||
int edge = 0;
|
||||
|
||||
for (int i = 1; i < m + 1; i++) {
|
||||
for (int j = 1; j < n + 1; j++) {//左上角
|
||||
for (int k = i; k < m + 1; k++) {
|
||||
for (int l = j; l < n + 1; l++) {//右下角
|
||||
int cur = prefix[k][l] - prefix[i - 1][l] - prefix[k][j - 1] + prefix[i - 1][j - 1];
|
||||
if (cur > result) {
|
||||
result = cur;
|
||||
edge = (k - i + 1) * (l - j + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
System.out.println(edge + " " + result);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,90 @@
|
|||
package com.markilue.leecode.interview.meituan.T0415;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Scanner;
|
||||
|
||||
/**
|
||||
*@BelongsProject: Leecode
|
||||
*@BelongsPackage: com.markilue.leecode.interview.meituan.T0415
|
||||
*@Author: markilue
|
||||
*@CreateTime: 2023-06-09 18:41
|
||||
*@Description: TODO
|
||||
*@Version: 1.0
|
||||
*/
|
||||
public class NestingDolls {
|
||||
|
||||
|
||||
static int n;
|
||||
static Doll[] dolls;
|
||||
|
||||
public static void main(String[] args) {
|
||||
Scanner in = new Scanner(System.in);
|
||||
n = in.nextInt();
|
||||
dolls = new Doll[n];
|
||||
for (int i = 0; i < n; ++i) {
|
||||
int a = in.nextInt(), b = in.nextInt(), c = in.nextInt();
|
||||
dolls[i] = new Doll(a, b, c);
|
||||
}
|
||||
Arrays.sort(dolls);//按最大空间有小到大排序
|
||||
|
||||
int minCost = 0;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
if (!dolls[i].used) {
|
||||
int cost = insertDoll(i, dolls[i].b);
|
||||
minCost += cost;
|
||||
}
|
||||
}
|
||||
System.out.println(minCost);
|
||||
|
||||
in.close();
|
||||
}
|
||||
|
||||
// 将第 i 个套娃插入内部大小为 size 的套娃中
|
||||
static int insertDoll(int i, int size) {
|
||||
dolls[i].used = true;
|
||||
|
||||
int j = findSmallest(size);
|
||||
if (j == -1) {//没有找到能够放在里面的,需要重新开辟
|
||||
dolls[i].minSize = size;
|
||||
dolls[i].minCost = dolls[i].c * size;//由于都放不进,所以最小值就是全部的
|
||||
return dolls[i].minCost;
|
||||
} else {
|
||||
int cost = insertDoll(j, dolls[i].a) + dolls[i].c * (size - dolls[j].a);
|
||||
if (cost < dolls[i].minCost) {
|
||||
dolls[i].minSize = size- dolls[j].a;
|
||||
dolls[i].minCost = cost;
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
}
|
||||
|
||||
// 寻找最小的被 size 占据的空间能够容纳的套娃
|
||||
static int findSmallest(int size) {
|
||||
int j = -1;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
if (!dolls[i].used && dolls[i].a <= size)
|
||||
if (j == -1 || dolls[i].minSize < dolls[j].minSize) j = i;
|
||||
}
|
||||
return j;
|
||||
}
|
||||
|
||||
static class Doll implements Comparable<Doll> {
|
||||
int a;
|
||||
int b;
|
||||
int c;
|
||||
boolean used = false; // 是否已经被放置
|
||||
int minSize; // 占据的最小内部空间
|
||||
int minCost; // 最小花费
|
||||
|
||||
Doll(int a, int b, int c) {
|
||||
this.a = a;
|
||||
this.b = b;
|
||||
this.c = c;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(Doll other) {
|
||||
return Integer.compare(a, other.a);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
package com.markilue.leecode.interview.meituan.T0415;
|
||||
|
||||
import java.util.Scanner;
|
||||
|
||||
/**
|
||||
*@BelongsProject: Leecode
|
||||
*@BelongsPackage: com.markilue.leecode.interview.meituan.T0415
|
||||
*@Author: markilue
|
||||
*@CreateTime: 2023-06-06 11:38
|
||||
*@Description:
|
||||
* TODO 字符串前缀:
|
||||
* 现在有两个字符串S和T,你需要对S进行若干次操作,使得S是T的一个前缀(空串也是一个前缀)。
|
||||
* 每次操作可以修改S的一个字符,或者删除一个S末尾的字符。小团需要写一段程序,输出最少需要操作的次数。
|
||||
*
|
||||
*@Version: 1.0
|
||||
*/
|
||||
public class Question1 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
Scanner sc = new Scanner(System.in);
|
||||
int count = sc.nextInt();
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
String S = sc.next();
|
||||
String T = sc.next();
|
||||
solve(S, T);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//为什么不是动态规划?因为题目要求,删只能删除S的末尾
|
||||
private static void solve(String s, String t) {
|
||||
|
||||
int result = 0;
|
||||
int pos = s.length() - 1;
|
||||
//如果S是T的前缀,则S一定要比T短
|
||||
if (s.length() > t.length()) {
|
||||
result += s.length() - t.length();
|
||||
pos = t.length() - 1;
|
||||
}
|
||||
//能修改就修改;不能修改再删除
|
||||
for (int i = pos; i >= 0; i--) {
|
||||
if (t.charAt(i) != s.charAt(i)) {
|
||||
result++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
System.out.println(result);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
package com.markilue.leecode.interview.meituan.T0415;
|
||||
|
||||
import java.util.Scanner;
|
||||
|
||||
|
||||
/**
|
||||
*@BelongsProject: Leecode
|
||||
*@BelongsPackage: com.markilue.leecode.interview.meituan.T0415
|
||||
*@Author: markilue
|
||||
*@CreateTime: 2023-06-07 10:28
|
||||
*@Description:
|
||||
* TODO 小美分糖:
|
||||
* 某一天,小美从商店买了两种糖果,分别买了a个和b个,要分给班上n个小朋友。为了不浪费,每块糖果都得恰好分到一个小朋友。
|
||||
* 另外,两种糖果一起吃的话味道其实并不好,所以每一个小朋友都只能得到其中一种糖果。
|
||||
* 小美希望分得最少糖果的那个小朋友能得到尽量多的糖果。小美的任务是求得这个数量是多少。
|
||||
*@Version: 1.0
|
||||
*/
|
||||
public class Question2 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
Scanner sc = new Scanner(System.in);
|
||||
int count = sc.nextInt();
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
int n = sc.nextInt();
|
||||
int a = sc.nextInt();
|
||||
int b = sc.nextInt();
|
||||
solve(n, a, b);
|
||||
}
|
||||
}
|
||||
|
||||
//挨个遍历均分,看看谁的最小更大
|
||||
private static void solve(int n, int a, int b) {
|
||||
if (a > b) {
|
||||
solve(n, b, a);
|
||||
return;
|
||||
}
|
||||
int min = Integer.MIN_VALUE;
|
||||
for (int i = 1; i < n; i++) {
|
||||
int curMin = Math.min(a / i, b / (n - i));
|
||||
if (min > curMin) {
|
||||
break;//在递减了,直接break
|
||||
} else {
|
||||
min = curMin;
|
||||
}
|
||||
}
|
||||
System.out.println(min);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,142 @@
|
|||
package com.markilue.leecode.interview.meituan.T0415;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Scanner;
|
||||
|
||||
/**
|
||||
*@BelongsProject: Leecode
|
||||
*@BelongsPackage: com.markilue.leecode.interview.meituan.T0415
|
||||
*@Author: markilue
|
||||
*@CreateTime: 2023-06-07 11:00
|
||||
*@Description:
|
||||
* TODO 交通规划:
|
||||
* A国有n个城市,这n个城市排成一列,依次编号为1,2,3,...,n。
|
||||
* 一开始,这n座城市之间都没有任何交通路线,于是政府打算修建一些铁路来进行交通规划。
|
||||
* 接下来T天,每一天会进行如下操作的其中一种:
|
||||
* - “L x”:表示编号为 x 的城市与其左边的城市之间修建一条铁路。如果 x 左边没有城市或者已经修建了铁路,则无视该操作;
|
||||
* - “R x”:表示编号为 x 的城市与其右边的城市之间修建一条铁路。如果 x 右边没有城市或者已经修建了铁路,则无视该操作;
|
||||
* - “Q x”:表示查询 x 往左边和往右边最远能到达的城市编号。
|
||||
* 你的任务是模拟以上操作,并对于每一条“Q x”操作,输出对应的答案。
|
||||
*@Version: 1.0
|
||||
*/
|
||||
public class Question3 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
Scanner sc = new Scanner(System.in);
|
||||
int total = sc.nextInt();
|
||||
//构建图
|
||||
List<Node> edge = new ArrayList<>();
|
||||
for (int i = 0; i < total; i++) {
|
||||
edge.add(new Node(i));
|
||||
}
|
||||
|
||||
int count = sc.nextInt();
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
String action = sc.next();
|
||||
int node = sc.nextInt() - 1;//num => index
|
||||
if (action.equals("L") && node - 1 >= 0) {
|
||||
edge.get(node).left = edge.get(node - 1);
|
||||
} else if (action.equals("R") && node + 1 < edge.size()) {
|
||||
edge.get(node).right = edge.get(node + 1);
|
||||
} else if (action.equals("Q")) {
|
||||
//查询
|
||||
Node cur = edge.get(node);
|
||||
Node tempLeft = cur;
|
||||
while (tempLeft.left != null) {
|
||||
tempLeft = tempLeft.left;
|
||||
}
|
||||
Node tempRight = cur;
|
||||
while (tempRight.right != null) {
|
||||
tempRight = tempRight.right;
|
||||
}
|
||||
System.out.println((tempLeft.val + 1) + " " + (tempRight.val + 1));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class Node {
|
||||
Node left;
|
||||
Node right;
|
||||
int val;
|
||||
|
||||
public Node() {
|
||||
}
|
||||
|
||||
public Node(int val) {
|
||||
this.val = val;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int[] father;
|
||||
|
||||
public void init(int[] father) {
|
||||
for (int i = 0; i < father.length; i++) {
|
||||
father[i] = i;
|
||||
}
|
||||
}
|
||||
|
||||
public int find(int x) {
|
||||
if (x == father[x]) return x;
|
||||
father[x] = find(father[x]);
|
||||
return father[x];
|
||||
}
|
||||
|
||||
public void union(int u, int v) {
|
||||
u = find(u);
|
||||
v = find(v);
|
||||
if (u == v) return;
|
||||
father[v] = u;
|
||||
}
|
||||
|
||||
|
||||
//并查集:这题本质上就是一个无向图,考察连通性,因此可以使用并查集简化计算
|
||||
public void solve1() {
|
||||
Scanner sc = new Scanner(System.in);
|
||||
int total = sc.nextInt();
|
||||
//构建图
|
||||
father = new int[total + 2];
|
||||
init(father);
|
||||
|
||||
|
||||
int count = sc.nextInt();
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
String action = sc.next();
|
||||
int node = sc.nextInt();//num => index
|
||||
if (action.equals("L")) {
|
||||
union(node, node - 1);
|
||||
} else if (action.equals("R")) {
|
||||
union(node, node + 1);
|
||||
} else {
|
||||
//查询 左边二分找到连通的最小值
|
||||
int l = 1;
|
||||
int r = node;
|
||||
while (l < r) {
|
||||
int mid = l + ((r - l) >> 1);
|
||||
if (find(node) == find(mid)) r = mid;//父亲是一样的,我们认为是联通的,所以缩小范围找更小的
|
||||
else l = mid + 1;
|
||||
}
|
||||
int res1 = r;
|
||||
//查询 右边二分找到连通的最大值
|
||||
l = node;
|
||||
r = total;
|
||||
while (l < r) {
|
||||
int mid = l + ((r - l) >> 1);
|
||||
if (find(node) == find(mid)) l = mid;//父亲是一样的,我们认为是联通的,所以缩小范围找更大的
|
||||
else r = mid - 1;
|
||||
}
|
||||
System.out.println(res1 + " " + r);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
package com.markilue.leecode.interview.meituan.T0415;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
*@BelongsProject: Leecode
|
||||
*@BelongsPackage: com.markilue.leecode.interview.meituan.T0415
|
||||
*@Author: markilue
|
||||
*@CreateTime: 2023-06-09 13:11
|
||||
*@Description:
|
||||
* TODO 小美玩套娃:
|
||||
* 小美最近喜欢上了玩套娃。
|
||||
* 具体的,小美有 n 个套娃,第 i 个套娃的大小为 ai,内部空间为 bi(bi≤ai)。
|
||||
* 对于两个套娃x,y, x能放入y中当且仅当ax≤by ,且放入后会占据 y 大小为 ax 的内部空间,即 y 的内部空间剩下 by-ax,
|
||||
* 每个套娃只能放在另外的一个套娃内,每个套娃内部也只能放一个套娃(当然内部放的这个套娃可以内部还有套娃)。
|
||||
* 显然套娃是套的越多越好,于是小美给每个套娃定义了一个价值 ci,
|
||||
* 如果套完之后套娃 i 还剩 k 的内部空间,小美需要付出ci*k 的花费,总花费为所有套娃的花费之和,现在小美想知道最小的花费为多少
|
||||
*@Version: 1.0
|
||||
*/
|
||||
public class Question4 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
Scanner sc = new Scanner(System.in);
|
||||
int n = sc.nextInt();
|
||||
int[] out = new int[n];
|
||||
int[] in = new int[n];
|
||||
int[] payment = new int[n];
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
out[i] = sc.nextInt();
|
||||
}
|
||||
for (int i = 0; i < n; i++) {
|
||||
in[i] = sc.nextInt();
|
||||
}
|
||||
for (int i = 0; i < n; i++) {
|
||||
payment[i] = sc.nextInt();
|
||||
}
|
||||
solve(out, in, payment);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test() {
|
||||
int[] out = {5, 4, 3};
|
||||
int[] in = {4, 2, 2};
|
||||
int[] payment = {3, 2, 1};
|
||||
solve(out, in, payment);
|
||||
}
|
||||
|
||||
//官方思路:可能是错的,贪心的思路:尽可能将花费大的先填满
|
||||
public static void solve(int[] out, int[] in, int[] payment) {
|
||||
//构造按大小排序的套娃
|
||||
List<int[]> taoS1 = new ArrayList<>();
|
||||
List<int[]> taoS2 = new ArrayList<>();
|
||||
for (int i = 0; i < out.length; i++) {
|
||||
taoS1.add(new int[]{out[i], in[i], payment[i], i});
|
||||
taoS2.add(new int[]{out[i], in[i], payment[i], i});
|
||||
}
|
||||
|
||||
Collections.sort(taoS1, ((o1, o2) -> o1[0] - o2[0]));//按空间有小到大排序
|
||||
Collections.sort(taoS2, ((o1, o2) -> o1[2] - o2[2]));//按花费排序
|
||||
|
||||
//按照空间大小遍历,找到最大的未使用的最大的套娃
|
||||
int n = out.length;
|
||||
int rightThreshold = n - 1;
|
||||
for (int i = n - 1; i >= 0; i--) {
|
||||
int left = 0;
|
||||
int right = rightThreshold;
|
||||
int mid = (left + right + 1) >> 1;//四舍五入
|
||||
while (left < right) {
|
||||
mid = (left + right + 1) >> 1;
|
||||
if (taoS2.get(i)[1] >= taoS1.get(mid)[0]) left = mid;//当前花费最大的能被放入;寻找更大能放入的
|
||||
else left = mid + 1;//为了快速收敛?
|
||||
}
|
||||
|
||||
if (taoS1.get(mid)[3] == taoS2.get(i)[3]) right--;//使用的自己,那不行
|
||||
if (taoS2.get(i)[1] < taoS1.get(right)[0]) break;//当前位置不可能被其他的点放入了
|
||||
|
||||
taoS2.get(i)[1] -= taoS1.get(right)[0];
|
||||
rightThreshold = right - 1;
|
||||
}
|
||||
|
||||
int result = 0;
|
||||
for (int[] total : taoS2) {
|
||||
result += total[1] * total[2];
|
||||
}
|
||||
System.out.println(result);
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -1,38 +1,35 @@
|
|||
package com.markilue.leecode.test;
|
||||
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Arrays;
|
||||
import java.util.Scanner;
|
||||
|
||||
public class Fibonaqi {
|
||||
|
||||
|
||||
/**
|
||||
* 测试使用时间复杂度为n的斐波那契数列递归法
|
||||
*
|
||||
*/
|
||||
// @Test
|
||||
// public static void testFibonaqi(){
|
||||
//
|
||||
// }
|
||||
|
||||
public static void main(String[] args) {
|
||||
int n=5;
|
||||
System.out.println(fibonacci(1,1,10));
|
||||
int n = 5;
|
||||
System.out.println(fibonacci(1, 1, 10));
|
||||
|
||||
}
|
||||
|
||||
public static int fibonacci(int first,int second,int n){
|
||||
if(n<=0){
|
||||
public static int fibonacci(int first, int second, int n) {
|
||||
if (n <= 0) {
|
||||
return 0;
|
||||
}
|
||||
if(n <3){
|
||||
if (n < 3) {
|
||||
return 1;
|
||||
}else if(n==3){
|
||||
return first+second;
|
||||
}
|
||||
else {
|
||||
return fibonacci(second,first+second,n-1);
|
||||
} else if (n == 3) {
|
||||
return first + second;
|
||||
} else {
|
||||
return fibonacci(second, first + second, n - 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
这个文件夹存放了
|
||||
1)爬虫学习的一些案例,实际案例操作 https://github.com/Python3WebSpider
|
||||
2)实际爬取的一些网站等
|
||||
等
|
||||
爬虫学习中心:https://setup.scrape.center/
|
||||
|
|
@ -0,0 +1,244 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/11/8 16:08
|
||||
@Usage :
|
||||
@Desc :参考 https://github.com/Python3WebSpider/BeautifulSoupTest
|
||||
'''
|
||||
|
||||
html = """
|
||||
<html><head><title>The Dormouse's story</title></head>
|
||||
<body>
|
||||
<p class="title" name="dromouse"><b>The Dormouse's story</b></p>
|
||||
<p class="story">Once upon a time there were three little sisters; and their names were
|
||||
<a href="http://example.com/elsie" class="sister" id="link1"><!-- Elsie --></a>,
|
||||
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
|
||||
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
|
||||
and they lived at the bottom of a well.</p>
|
||||
<p class="story">...</p>
|
||||
"""
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
|
||||
def baseUse():
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
print(soup.title) # <title>The Dormouse's story</title>
|
||||
print(type(soup.title)) # <class 'bs4.element.Tag'>
|
||||
print(soup.title.string) # The Dormouse's story
|
||||
print(soup.head) # <head><title>The Dormouse's story</title></head>
|
||||
print(soup.p) # <p class="title" name="dromouse"><b>The Dormouse's story</b></p>
|
||||
print(soup.p.name) # 获取节点名称 p
|
||||
print(soup.p.attrs) # 获取属性 {'class': ['title'], 'name': 'dromouse'}
|
||||
print(soup.p.attrs['name']) # 获取属性值 dromouse
|
||||
print(soup.p['name']) # 获取属性值 dromouse
|
||||
print(soup.body.p['name']) # 嵌套选择 dromouse
|
||||
|
||||
print("==========================")
|
||||
|
||||
|
||||
def child():
|
||||
html = """
|
||||
<html>
|
||||
<head>
|
||||
<title>The Dormouse's story</title>
|
||||
</head>
|
||||
<body>
|
||||
<p class="story">
|
||||
Once upon a time there were three little sisters; and their names were
|
||||
<a href="http://example.com/elsie" class="sister" id="link1">
|
||||
<span>Elsie</span>
|
||||
</a>
|
||||
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a>
|
||||
and
|
||||
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>
|
||||
and they lived at the bottom of a well.
|
||||
</p>
|
||||
<p class="story">...</p>
|
||||
"""
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
# 子结点
|
||||
for i, child in enumerate(soup.p.children):
|
||||
print(i, child)
|
||||
print("===============================")
|
||||
# 子孙节点
|
||||
for i, child in enumerate(soup.p.descendants):
|
||||
print(i, child)
|
||||
print("===============================")
|
||||
|
||||
|
||||
def parent():
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
# 父节点
|
||||
print(soup.a.parent)
|
||||
print("===============================")
|
||||
# 祖父节点
|
||||
print(type(soup.a.parents))
|
||||
print(list(enumerate(soup.a.parents)))
|
||||
print("=============================")
|
||||
|
||||
|
||||
def brother():
|
||||
html = """
|
||||
<html>
|
||||
<body>
|
||||
<p class="story">
|
||||
Once upon a time there were three little sisters; and their names were
|
||||
<a href="http://example.com/elsie" class="sister" id="link1">
|
||||
<span>Elsie</span>
|
||||
</a>
|
||||
Hello
|
||||
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a>
|
||||
and
|
||||
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>
|
||||
and they lived at the bottom of a well.
|
||||
</p>
|
||||
"""
|
||||
# 兄弟节点
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
print('Next Sibling', soup.a.next_sibling)
|
||||
print('Prev Sibling', soup.a.previous_sibling)
|
||||
print('Next Siblings', list(enumerate(soup.a.next_siblings)))
|
||||
print('Prev Siblings', list(enumerate(soup.a.previous_siblings)))
|
||||
|
||||
# 找到所有满足条件的
|
||||
def findAll():
|
||||
|
||||
html = '''
|
||||
<div class="panel">
|
||||
<div class="panel-heading">
|
||||
<h4>Hello</h4>
|
||||
</div>
|
||||
<div class="panel-body">
|
||||
<ul class="list" id="list-1">
|
||||
<li class="element">Foo</li>
|
||||
<li class="element">Bar</li>
|
||||
<li class="element">Jay</li>
|
||||
</ul>
|
||||
<ul class="list list-small" id="list-2">
|
||||
<li class="element">Foo</li>
|
||||
<li class="element">Bar</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
'''
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
print(soup.find_all(name='ul'))
|
||||
print(type(soup.find_all(name='ul')[0]))
|
||||
|
||||
for ul in soup.find_all(name='ul'):
|
||||
print(ul.find_all(name='li'))
|
||||
|
||||
for ul in soup.find_all(name='ul'):
|
||||
print(ul.find_all(name='li'))
|
||||
for li in ul.find_all(name='li'):
|
||||
print(li.string)
|
||||
|
||||
|
||||
# 找属性满足匹配得到
|
||||
def attrs():
|
||||
html = '''
|
||||
<div class="panel">
|
||||
<div class="panel-heading">
|
||||
<h4>Hello</h4>
|
||||
</div>
|
||||
<div class="panel-body">
|
||||
<ul class="list" id="list-1" name="elements">
|
||||
<li class="element">Foo</li>
|
||||
<li class="element">Bar</li>
|
||||
<li class="element">Jay</li>
|
||||
</ul>
|
||||
<ul class="list list-small" id="list-2">
|
||||
<li class="element">Foo</li>
|
||||
<li class="element">Bar</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
'''
|
||||
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
print(soup.find_all(attrs={'id': 'list-1'}))
|
||||
print(soup.find_all(attrs={'name': 'elements'}))
|
||||
|
||||
# 常用的属性可以不用attrs传递
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
print(soup.find_all(id='list-1'))
|
||||
print(soup.find_all(class_='element'))
|
||||
import re
|
||||
print(soup.find_all(string=re.compile('Foo')))# string等同于text,即里面的具体内容
|
||||
|
||||
|
||||
# 返回匹配到的第一个元素
|
||||
def find():
|
||||
html = '''
|
||||
<div class="panel">
|
||||
<div class="panel-heading">
|
||||
<h4>Hello</h4>
|
||||
</div>
|
||||
<div class="panel-body">
|
||||
<ul class="list" id="list-1">
|
||||
<li class="element">Foo</li>
|
||||
<li class="element">Bar</li>
|
||||
<li class="element">Jay</li>
|
||||
</ul>
|
||||
<ul class="list list-small" id="list-2">
|
||||
<li class="element">Foo</li>
|
||||
<li class="element">Bar</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
'''
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
print(soup.find(name='ul'))
|
||||
print(type(soup.find(name='ul')))
|
||||
print(soup.find(class_='list'))
|
||||
|
||||
# css选择器
|
||||
def cssSelect():
|
||||
html = '''
|
||||
<div class="panel">
|
||||
<div class="panel-heading">
|
||||
<h4>Hello</h4>
|
||||
</div>
|
||||
<div class="panel-body">
|
||||
<ul class="list" id="list-1">
|
||||
<li class="element">Foo</li>
|
||||
<li class="element">Bar</li>
|
||||
<li class="element">Jay</li>
|
||||
</ul>
|
||||
<ul class="list list-small" id="list-2">
|
||||
<li class="element">Foo</li>
|
||||
<li class="element">Bar</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
'''
|
||||
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
print(soup.select('.panel .panel-heading'))
|
||||
print(soup.select('ul li'))
|
||||
print(soup.select('#list-2 .element'))
|
||||
print(type(soup.select('ul')[0]))
|
||||
|
||||
# 嵌套选择
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
for ul in soup.select('ul'):
|
||||
print(ul.select('li'))
|
||||
|
||||
# 获取属性
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
for ul in soup.select('ul'):
|
||||
print(ul['id'])
|
||||
print(ul.attrs['id'])
|
||||
|
||||
# 获取文本
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
for li in soup.select('li'):
|
||||
print('Get Text:', li.get_text())
|
||||
print('String:', li.string)
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
cssSelect()
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/11/8 16:07
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/11/8 16:54
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
|
@ -0,0 +1,329 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/11/8 16:54
|
||||
@Usage :
|
||||
@Desc :Pyquery学习 参考: https://github.com/Python3WebSpider/PyQueryTest
|
||||
'''
|
||||
from pyquery import PyQuery as pq
|
||||
|
||||
|
||||
# 字符串初始化
|
||||
def stringBase():
|
||||
html = '''
|
||||
<div>
|
||||
<ul>
|
||||
<li class="item-0">first item</li>
|
||||
<li class="item-1"><a href="link2.html">second item</a></li>
|
||||
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
|
||||
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
|
||||
<li class="item-0"><a href="link5.html">fifth item</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
'''
|
||||
|
||||
doc = pq(html)
|
||||
print(doc('li'))
|
||||
|
||||
|
||||
# URL初始化
|
||||
def URLBase():
|
||||
doc = pq(url='https://cuiqingcai.com')
|
||||
print(doc('title'))
|
||||
|
||||
# 上述代码等同于下面
|
||||
# doc = pq(requests.get('https://cuiqingcai.com').text)
|
||||
# print(doc('title'))
|
||||
|
||||
|
||||
# 文件初始化
|
||||
def fileBase():
|
||||
doc = pq(filename='demo.html')
|
||||
print(doc('li'))
|
||||
|
||||
# 基本的css选择器
|
||||
def cssSelect():
|
||||
html = '''
|
||||
<div id="container">
|
||||
<ul class="list">
|
||||
<li class="item-0">first item</li>
|
||||
<li class="item-1"><a href="link2.html">second item</a></li>
|
||||
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
|
||||
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
|
||||
<li class="item-0"><a href="link5.html">fifth item</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
'''
|
||||
doc = pq(html)
|
||||
print(doc('#container .list li'))
|
||||
print(type(doc('#container .list li')))
|
||||
|
||||
#
|
||||
for item in doc('#container .list li').items():
|
||||
print(item.text())
|
||||
|
||||
# 寻找子节点
|
||||
def child():
|
||||
html = '''
|
||||
<div>
|
||||
<ul class="list">
|
||||
<li class="item-0">first item</li>
|
||||
<li class="item-1"><a href="link2.html">second item</a></li>
|
||||
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
|
||||
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
|
||||
<li class="item-0"><a href="link5.html">fifth item</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
'''
|
||||
doc = pq(html)
|
||||
items = doc('.list')
|
||||
print(type(items))
|
||||
print(items)
|
||||
lis = items.find('li')
|
||||
print(type(lis))
|
||||
print(lis)
|
||||
#
|
||||
#
|
||||
lis = items.children()
|
||||
print(type(lis))
|
||||
print(lis)
|
||||
|
||||
#
|
||||
lis = items.children('.active')
|
||||
print(lis)
|
||||
|
||||
|
||||
def parent():
|
||||
html = '''
|
||||
<div class="wrap">
|
||||
<div id="container">
|
||||
<ul class="list">
|
||||
<li class="item-0">first item</li>
|
||||
<li class="item-1"><a href="link2.html">second item</a></li>
|
||||
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
|
||||
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
|
||||
<li class="item-0"><a href="link5.html">fifth item</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
'''
|
||||
from pyquery import PyQuery as pq
|
||||
doc = pq(html)
|
||||
items = doc('.list')
|
||||
container = items.parent()
|
||||
print(type(container))
|
||||
print(container)
|
||||
|
||||
from pyquery import PyQuery as pq
|
||||
doc = pq(html)
|
||||
items = doc('.list')
|
||||
parents = items.parents()
|
||||
print(type(parents))
|
||||
print(parents)
|
||||
|
||||
parent = items.parents('.wrap')
|
||||
print(parent)
|
||||
|
||||
from pyquery import PyQuery as pq
|
||||
doc = pq(html)
|
||||
li = doc('.list .item-0.active')
|
||||
print(li.siblings())
|
||||
|
||||
def brother():
|
||||
html = '''
|
||||
<div class="wrap">
|
||||
<div id="container">
|
||||
<ul class="list">
|
||||
<li class="item-0">first item</li>
|
||||
<li class="item-1"><a href="link2.html">second item</a></li>
|
||||
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
|
||||
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
|
||||
<li class="item-0"><a href="link5.html">fifth item</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
'''
|
||||
from pyquery import PyQuery as pq
|
||||
doc = pq(html)
|
||||
li = doc('.list .item-0.active')
|
||||
print(li.siblings('.active'))
|
||||
|
||||
from pyquery import PyQuery as pq
|
||||
doc = pq(html)
|
||||
li = doc('.item-0.active')
|
||||
print(li)
|
||||
print(str(li))
|
||||
|
||||
from pyquery import PyQuery as pq
|
||||
doc = pq(html)
|
||||
# 可能是多个节点
|
||||
lis = doc('li').items()
|
||||
print(type(lis))
|
||||
for li in lis:
|
||||
print(li, type(li))
|
||||
|
||||
def attrs():
|
||||
html = '''
|
||||
<div class="wrap">
|
||||
<div id="container">
|
||||
<ul class="list">
|
||||
<li class="item-0">first item</li>
|
||||
<li class="item-1"><a href="link2.html">second item</a></li>
|
||||
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
|
||||
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
|
||||
<li class="item-0"><a href="link5.html">fifth item</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
'''
|
||||
from pyquery import PyQuery as pq
|
||||
doc = pq(html)
|
||||
a = doc('.item-0.active a')
|
||||
print(a, type(a))
|
||||
print(a.attr('href'))
|
||||
|
||||
a = doc('a')
|
||||
print(a, type(a))
|
||||
print(a.attr('href'))
|
||||
print(a.attr.href)
|
||||
|
||||
from pyquery import PyQuery as pq
|
||||
doc = pq(html)
|
||||
a = doc('a')
|
||||
for item in a.items():
|
||||
# 获取属性和文本
|
||||
print(item.attr('href'),item.text())
|
||||
|
||||
def getHTML():
|
||||
html = '''
|
||||
<div class="wrap">
|
||||
<div id="container">
|
||||
<ul class="list">
|
||||
<li class="item-1"><a href="link2.html">second item</a></li>
|
||||
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
|
||||
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
|
||||
<li class="item-0"><a href="link5.html">fifth item</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
'''
|
||||
from pyquery import PyQuery as pq
|
||||
doc = pq(html)
|
||||
li = doc('li')
|
||||
print(li.html()) # 第一个节点对应的html <a href="link2.html">second item</a>
|
||||
print(li.text()) # 所有匹配的节点的文本 second item third item fourth item fifth item
|
||||
print(type(li.text()))
|
||||
|
||||
# 增加或者删除节点的class
|
||||
def operateNode():
|
||||
html = '''
|
||||
<div class="wrap">
|
||||
<div id="container">
|
||||
<ul class="list">
|
||||
<li class="item-0">first item</li>
|
||||
<li class="item-1"><a href="link2.html">second item</a></li>
|
||||
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
|
||||
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
|
||||
<li class="item-0"><a href="link5.html">fifth item</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
'''
|
||||
from pyquery import PyQuery as pq
|
||||
doc = pq(html)
|
||||
li = doc('.item-0.active')
|
||||
print(li)
|
||||
li.removeClass('active')
|
||||
print(li)
|
||||
li.addClass('active')
|
||||
print(li)
|
||||
|
||||
'''
|
||||
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
|
||||
|
||||
<li class="item-0"><a href="link3.html"><span class="bold">third item</span></a></li>
|
||||
|
||||
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
|
||||
'''
|
||||
|
||||
|
||||
|
||||
def operateNodeInformation():
|
||||
html = '''
|
||||
<ul class="list">
|
||||
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
|
||||
</ul>
|
||||
'''
|
||||
from pyquery import PyQuery as pq
|
||||
doc = pq(html)
|
||||
li = doc('.item-0.active')
|
||||
print(li)
|
||||
li.attr('name', 'link')
|
||||
print(li)
|
||||
li.text('changed item')
|
||||
print(li)
|
||||
li.html('<span>changed item</span>')
|
||||
print(li)
|
||||
'''
|
||||
<li class="item-0 active" name="link"><a href="link3.html"><span class="bold">third item</span></a></li>
|
||||
<li class="item-0 active" name="link">changed item</li>
|
||||
<li class="item-0 active" name="link"><span>changed item</span></li>
|
||||
'''
|
||||
|
||||
|
||||
def removeInformation():
|
||||
html = '''
|
||||
<div class="wrap">
|
||||
Hello, World
|
||||
<p>This is a paragraph.</p>
|
||||
</div>
|
||||
'''
|
||||
from pyquery import PyQuery as pq
|
||||
doc = pq(html)
|
||||
wrap = doc('.wrap')
|
||||
print(wrap.text())
|
||||
'''
|
||||
Hello, World
|
||||
This is a paragraph.
|
||||
'''
|
||||
wrap.find('p').remove()
|
||||
print(wrap.text())
|
||||
'''
|
||||
Hello, World
|
||||
'''
|
||||
|
||||
# 伪类选择器
|
||||
def fakeCSSSelect():
|
||||
html = '''
|
||||
<div class="wrap">
|
||||
<div id="container">
|
||||
<ul class="list">
|
||||
<li class="item-0">first item</li>
|
||||
<li class="item-1"><a href="link2.html">second item</a></li>
|
||||
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
|
||||
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
|
||||
<li class="item-0"><a href="link5.html">fifth item</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
'''
|
||||
from pyquery import PyQuery as pq
|
||||
doc = pq(html)
|
||||
li = doc('li:first-child')
|
||||
print(li)
|
||||
li = doc('li:last-child')
|
||||
print(li)
|
||||
li = doc('li:nth-child(2)')
|
||||
print(li)
|
||||
li = doc('li:gt(2)')
|
||||
print(li)
|
||||
li = doc('li:nth-child(2n)')
|
||||
print(li)
|
||||
li = doc('li:contains(second)')
|
||||
print(li)
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
fakeCSSSelect()
|
||||
|
|
@ -0,0 +1,195 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/11/8 15:15
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
||||
from lxml import etree
|
||||
|
||||
'''
|
||||
XPath基本规则:
|
||||
|
||||
1) nodename:选择此节点的所有子节点
|
||||
2) /:从当前节点选取直接子节点
|
||||
3) //:从当前阶段选择子孙节点
|
||||
4) .:选取当前节点
|
||||
5) ..:选取当前节点的父节点
|
||||
6) @:选取属性
|
||||
|
||||
举例:
|
||||
//title[@lang='eng]代表选择所有名称为title,同时属性lang的值为eng的节点
|
||||
'''
|
||||
|
||||
|
||||
def htmlByString():
|
||||
text = '''
|
||||
<div>
|
||||
<ul>
|
||||
<li class="item-0"><a href="link1.html">first item</a></li>
|
||||
<li class="item-1"><a href="link2.html">second item</a></li>
|
||||
<li class="item-inactive"><a href="link3.html">third item</a></li>
|
||||
<li class="item-1"><a href="link4.html">fourth item</a></li>
|
||||
<li class="item-0"><a href="link5.html">fifth item</a>
|
||||
</ul>
|
||||
</div>
|
||||
'''
|
||||
html = etree.HTML(text)
|
||||
result = etree.tostring(html)
|
||||
print(result.decode('utf-8'))
|
||||
|
||||
|
||||
def htmlByFile():
|
||||
html = etree.parse('./test.html', etree.HTMLParser())
|
||||
result = etree.tostring(html)
|
||||
print(result.decode('utf-8'))
|
||||
|
||||
|
||||
def allNode():
|
||||
html = etree.parse('./test.html', etree.HTMLParser())
|
||||
# 从头开始匹配所有的
|
||||
result = html.xpath('//*')
|
||||
print(result)
|
||||
print(result[0])
|
||||
|
||||
# 匹配所有li的
|
||||
result = html.xpath('//li')
|
||||
print(result)
|
||||
print(result[0])
|
||||
|
||||
|
||||
# 子节点匹配
|
||||
def childNode():
|
||||
html = etree.parse('./test.html', etree.HTMLParser())
|
||||
|
||||
# 匹配所有li的子节点a
|
||||
result = html.xpath('//li/a')
|
||||
print(result)
|
||||
print(result[0])
|
||||
|
||||
# 匹配所有li的子孙节点a 相当于只要是子节点下面的就可以匹配上
|
||||
result = html.xpath('//ul//a')
|
||||
print(result)
|
||||
print(result[0])
|
||||
|
||||
|
||||
# 父节点匹配
|
||||
def fatherNode():
|
||||
html = etree.parse('./test.html', etree.HTMLParser())
|
||||
|
||||
# 匹配a节点属性href是link4.html的父节点的class属性
|
||||
result = html.xpath('//a[@href="link4.html"]/../@class')
|
||||
print(result)
|
||||
# 也可以通过parent::来获取
|
||||
result = html.xpath('//a[@href="link4.html"]/parent::*/@class')
|
||||
print(result)
|
||||
|
||||
|
||||
# 文本获取
|
||||
def textGet():
|
||||
html = etree.parse('./test.html', etree.HTMLParser())
|
||||
|
||||
# 匹配li节点属性class是item-0的节点的子节点a的text
|
||||
result = html.xpath('//li[@class="item-0"]/a/text()')
|
||||
print(result) # ['first item', 'fifth item']
|
||||
|
||||
# 匹配li节点属性class是item-0的节点的子孙节点的text
|
||||
result = html.xpath('//li[@class="item-0"]//text()')
|
||||
print(result) # ['first item', 'fifth item', '\r\n ']
|
||||
|
||||
|
||||
# 属性获取
|
||||
def fieldGet():
|
||||
html = etree.parse('./test.html', etree.HTMLParser())
|
||||
|
||||
# 匹配li节点属性class是item-0的节点的子节点a的href属性
|
||||
result = html.xpath('//li/a/@href')
|
||||
print(result) # ['link1.html', 'link2.html', 'link3.html', 'link4.html', 'link5.html']
|
||||
|
||||
|
||||
# 属性多值匹配
|
||||
def fieldsGet():
|
||||
text = '''
|
||||
<li class="li li-first"><a href="link.html">first item</a></li>
|
||||
'''
|
||||
html = etree.HTML(text)
|
||||
result = html.xpath('//li[@class="li"]/a/text()')
|
||||
print(result) # [] 匹配不到
|
||||
|
||||
result = html.xpath('//li[contains(@class, "li")]/a/text()')
|
||||
print(result) # ['first item'] contains匹配到了
|
||||
|
||||
|
||||
# 多属性匹配
|
||||
def fieldssGet():
|
||||
text = '''
|
||||
<li class="li li-first" name="item"><a href="link.html">first item</a></li>
|
||||
'''
|
||||
html = etree.HTML(text)
|
||||
# 多属性用and连接
|
||||
result = html.xpath('//li[contains(@class, "li") and @name="item"]/a/text()')
|
||||
print(result)
|
||||
|
||||
|
||||
# 按序选择
|
||||
def orderGet():
|
||||
text = '''
|
||||
<div>
|
||||
<ul>
|
||||
<li class="item-0"><a href="link1.html">first item</a></li>
|
||||
<li class="item-1"><a href="link2.html">second item</a></li>
|
||||
<li class="item-inactive"><a href="link3.html">third item</a></li>
|
||||
<li class="item-1"><a href="link4.html">fourth item</a></li>
|
||||
<li class="item-0"><a href="link5.html">fifth item</a>
|
||||
</ul>
|
||||
</div>
|
||||
'''
|
||||
html = etree.HTML(text)
|
||||
result = html.xpath('//li[1]/a/text()')
|
||||
print(result) # ['first item']
|
||||
result = html.xpath('//li[last()]/a/text()')
|
||||
print(result) # ['fifth item']
|
||||
result = html.xpath('//li[position()<3]/a/text()')
|
||||
print(result) # ['first item', 'second item']
|
||||
result = html.xpath('//li[last()-2]/a/text()')
|
||||
print(result) # ['third item']
|
||||
|
||||
|
||||
def nodeSelect():
|
||||
text = '''
|
||||
<div>
|
||||
<ul>
|
||||
<li class="item-0"><a href="link1.html"><span>first item</span></a></li>
|
||||
<li class="item-1"><a href="link2.html">second item</a></li>
|
||||
<li class="item-inactive"><a href="link3.html">third item</a></li>
|
||||
<li class="item-1"><a href="link4.html">fourth item</a></li>
|
||||
<li class="item-0"><a href="link5.html">fifth item</a>
|
||||
</ul>
|
||||
</div>
|
||||
'''
|
||||
html = etree.HTML(text)
|
||||
result = html.xpath('//li[1]/ancestor::*')
|
||||
print(result)
|
||||
# ancestor获取祖先
|
||||
result = html.xpath('//li[1]/ancestor::div')
|
||||
print(result)
|
||||
# attribute获取所有属性
|
||||
result = html.xpath('//li[1]/attribute::*')
|
||||
print(result)
|
||||
# child获取子节点
|
||||
result = html.xpath('//li[1]/child::a[@href="link1.html"]')
|
||||
print(result)
|
||||
# descendant获取子孙结点
|
||||
result = html.xpath('//li[1]/descendant::span')
|
||||
print(result)
|
||||
# following获取当前节点之后的所有节点
|
||||
result = html.xpath('//li[1]/following::*[2]')
|
||||
print(result)
|
||||
# following-sibling获取当前节点之后的同级节点
|
||||
result = html.xpath('//li[1]/following-sibling::*')
|
||||
print(result)
|
||||
|
||||
if __name__ == '__main__':
|
||||
nodeSelect()
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/11/8 15:15
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
<div>
|
||||
<ul>
|
||||
<li class="item-0"><a href="link1.html">first item</a></li>
|
||||
<li class="item-1"><a href="link2.html">second item</a></li>
|
||||
<li class="item-inactive"><a href="link3.html">third item</a></li>
|
||||
<li class="item-1"><a href="link4.html">fourth item</a></li>
|
||||
<li class="item-0"><a href="link5.html">fifth item</a>
|
||||
</ul>
|
||||
</div>
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/11/8 15:12
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 14:03
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 14:39
|
||||
@Usage :
|
||||
@Desc : 保存成Json
|
||||
'''
|
||||
|
||||
import json
|
||||
|
||||
str = '''
|
||||
[{
|
||||
"name": "Bob",
|
||||
"gender": "male",
|
||||
"birthday": "1992-10-18"
|
||||
}, {
|
||||
"name": "Selina",
|
||||
"gender": "female",
|
||||
"birthday": "1995-10-18"
|
||||
}]
|
||||
'''
|
||||
print(type(str))
|
||||
data = json.loads(str)
|
||||
print(data)
|
||||
print(type(data))
|
||||
|
||||
import json
|
||||
|
||||
data = [{
|
||||
'name': 'Bob',
|
||||
'gender': 'male',
|
||||
'birthday': '1992-10-18'
|
||||
}]
|
||||
with open('data.json', 'w', encoding='utf-8') as file:
|
||||
file.write(json.dumps(data))
|
||||
|
||||
with open('data.json', 'w', encoding='utf-8') as file:
|
||||
# indent就是有缩进的
|
||||
file.write(json.dumps(data, indent=2))
|
||||
|
||||
data = [{
|
||||
'name': '张三',
|
||||
'gender': 'male',
|
||||
'birthday': '1992-10-18'
|
||||
}]
|
||||
|
||||
with open('data.json', 'w', encoding='utf-8') as file:
|
||||
# indent就是有缩进的,ensure_ascii规定编码格式(输出中文)
|
||||
file.write(json.dumps(data, indent=2, ensure_ascii=False))
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 15:03
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
||||
import pymysql
|
||||
|
||||
data = {
|
||||
'id': '20120001',
|
||||
'name': 'Bob',
|
||||
'age': 20
|
||||
}
|
||||
# 通过字典动态构建插入语句
|
||||
table = 'students'
|
||||
keys = ', '.join(data.keys())
|
||||
values = ', '.join(['%s'] * len(data))
|
||||
db = pymysql.connect(host='localhost', user='root',
|
||||
password=None, port=3306, db='spiders')
|
||||
cursor = db.cursor()
|
||||
sql = 'INSERT INTO {table}({keys}) VALUES ({values})'.format(
|
||||
table=table, keys=keys, values=values)
|
||||
try:
|
||||
if cursor.execute(sql, tuple(data.values())):
|
||||
print('Successful')
|
||||
db.commit()
|
||||
except Exception as e:
|
||||
print('Failed', e)
|
||||
db.rollback()
|
||||
db.close()
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 14:08
|
||||
@Usage :
|
||||
@Desc :保存为Text
|
||||
'''
|
||||
|
||||
import requests
|
||||
from pyquery import PyQuery as pq
|
||||
import re
|
||||
|
||||
url = 'https://ssr1.scrape.center/'
|
||||
html = requests.get(url).text
|
||||
doc = pq(html)
|
||||
items = doc('.el-card').items()
|
||||
|
||||
file = open('movies.txt', 'w', encoding='utf-8')
|
||||
for item in items:
|
||||
# 名称
|
||||
name = item.find('a > h2').text()
|
||||
file.write(f'名称: {name}\n')
|
||||
# 类别
|
||||
categories = [item.text() for item in item.find('.categories button span').items()]
|
||||
file.write(f'类别: {categories}\n')
|
||||
# 上映时间
|
||||
published_at = item.find('.info:contains(上映)').text()
|
||||
published_at = re.search('(\d{4}-\d{2}-\d{2})', published_at).group(1) \
|
||||
if published_at and re.search('\d{4}-\d{2}-\d{2}', published_at) else None
|
||||
file.write(f'上映时间: {published_at}\n')
|
||||
# 评分
|
||||
score = item.find('p.score').text()
|
||||
file.write(f'评分: {score}\n')
|
||||
file.write(f'{"=" * 50}\n')
|
||||
file.close()
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 16:01
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
||||
import requests
|
||||
import logging
|
||||
import json
|
||||
from os import makedirs
|
||||
from os.path import exists
|
||||
|
||||
logging.basicConfig(level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s: %(message)s')
|
||||
|
||||
INDEX_URL = 'https://spa1.scrape.center/api/movie/?limit={limit}&offset={offset}'
|
||||
DETAIL_URL = 'https://spa1.scrape.center/api/movie/{id}'
|
||||
LIMIT = 10
|
||||
TOTAL_PAGE = 10
|
||||
RESULTS_DIR = 'results'
|
||||
exists(RESULTS_DIR) or makedirs(RESULTS_DIR)
|
||||
|
||||
|
||||
def scrape_api(url):
|
||||
logging.info('scraping %s...', url)
|
||||
try:
|
||||
response = requests.get(url)
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
logging.error('get invalid status code %s while scraping %s',
|
||||
response.status_code, url)
|
||||
except requests.RequestException:
|
||||
logging.error('error occurred while scraping %s', url, exc_info=True)
|
||||
|
||||
|
||||
def scrape_index(page):
|
||||
url = INDEX_URL.format(limit=LIMIT, offset=LIMIT * (page - 1))
|
||||
return scrape_api(url)
|
||||
|
||||
|
||||
def scrape_detail(id):
|
||||
url = DETAIL_URL.format(id=id)
|
||||
return scrape_api(url)
|
||||
|
||||
|
||||
def save_data(data):
|
||||
name = data.get('name')
|
||||
data_path = f'{RESULTS_DIR}/{name}.json'
|
||||
json.dump(data, open(data_path, 'w', encoding='utf-8'),
|
||||
ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
def main():
|
||||
for page in range(1, TOTAL_PAGE + 1):
|
||||
index_data = scrape_index(page)
|
||||
for item in index_data.get('results'):
|
||||
id = item.get('id')
|
||||
detail_data = scrape_detail(id)
|
||||
logging.info('detail data %s', detail_data)
|
||||
save_data(detail_data)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 15:58
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 16:19
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 16:57
|
||||
@Usage : aiohttp库的使用
|
||||
@Desc :
|
||||
@参考:https://github.dev/Python3WebSpider/AsyncTest demo12
|
||||
'''
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
|
||||
|
||||
async def fetch(session, url):
|
||||
async with session.get(url) as response:
|
||||
return await response.text(), response.status
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
html, status = await fetch(session, 'https://cuiqingcai.com')
|
||||
print(f'html: {html[:100]}...')
|
||||
print(f'status: {status}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 17:02
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 19:14
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import logging
|
||||
from motor.motor_asyncio import AsyncIOMotorClient
|
||||
|
||||
logging.basicConfig(level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s: %(message)s')
|
||||
|
||||
INDEX_URL = 'https://spa5.scrape.center/api/book/?limit=18&offset={offset}'
|
||||
DETAIL_URL = 'https://spa5.scrape.center/api/book/{id}'
|
||||
PAGE_SIZE = 18
|
||||
PAGE_NUMBER = 1
|
||||
CONCURRENCY = 5
|
||||
|
||||
session = None
|
||||
|
||||
MONGO_CONNECTION_STRING = 'mongodb://localhost:27017'
|
||||
MONGO_DB_NAME = 'books'
|
||||
MONGO_COLLECTION_NAME = 'books'
|
||||
|
||||
client = AsyncIOMotorClient(MONGO_CONNECTION_STRING)
|
||||
db = client[MONGO_DB_NAME]
|
||||
collection = db[MONGO_COLLECTION_NAME]
|
||||
|
||||
semaphore = asyncio.Semaphore(CONCURRENCY)
|
||||
|
||||
|
||||
async def scrape_api(url):
|
||||
async with semaphore:
|
||||
try:
|
||||
logging.info('scraping %s', url)
|
||||
async with session.get(url) as response:
|
||||
return await response.json()
|
||||
except aiohttp.ClientError:
|
||||
logging.error('error occurred while scraping %s', url, exc_info=True)
|
||||
|
||||
|
||||
async def scrape_index(page):
|
||||
url = INDEX_URL.format(offset=PAGE_SIZE * (page - 1))
|
||||
return await scrape_api(url)
|
||||
|
||||
|
||||
async def scrape_detail(id):
|
||||
url = DETAIL_URL.format(id=id)
|
||||
data = await scrape_api(url)
|
||||
await save_data(data)
|
||||
|
||||
|
||||
async def save_data(data):
|
||||
logging.info('saving data %s', data)
|
||||
if data:
|
||||
return await collection.update_one({
|
||||
'id': data.get('id')
|
||||
}, {
|
||||
'$set': data
|
||||
}, upsert=True)
|
||||
|
||||
|
||||
async def main():
|
||||
# index tasks
|
||||
global session
|
||||
session = aiohttp.ClientSession()
|
||||
scrape_index_tasks = [asyncio.ensure_future(scrape_index(page)) for page in range(1, PAGE_NUMBER + 1)]
|
||||
results = await asyncio.gather(*scrape_index_tasks)
|
||||
# detail tasks
|
||||
print('results', results)
|
||||
ids = []
|
||||
for index_data in results:
|
||||
if not index_data: continue
|
||||
for item in index_data.get('results'):
|
||||
ids.append(item.get('id'))
|
||||
scrape_detail_tasks = [asyncio.ensure_future(scrape_detail(id)) for id in ids]
|
||||
await asyncio.wait(scrape_detail_tasks)
|
||||
await session.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 16:57
|
||||
@Usage : aiohttp库的使用
|
||||
@Desc :
|
||||
@参考:https://github.dev/Python3WebSpider/AsyncTest demo12
|
||||
'''
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
|
||||
|
||||
async def fetch(session, url):
|
||||
async with session.get(url) as response:
|
||||
return await response.text(), response.status
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
html, status = await fetch(session, 'https://cuiqingcai.com')
|
||||
print(f'html: {html[:100]}...')
|
||||
print(f'status: {status}')
|
||||
|
||||
|
||||
# 给url参数
|
||||
async def main1():
|
||||
params = {'name': 'germey', 'age': 25}
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get('https://httpbin.org/get', params=params) as response:
|
||||
print(await response.text())
|
||||
'''
|
||||
session还支持其他请求类型:
|
||||
session.post('https://httpbin.org/post', data=b'data')
|
||||
session.put('https://httpbin.org/put', data=b'data')
|
||||
session.delete('https://httpbin.org/delete')
|
||||
session.head('https://httpbin.org/get')
|
||||
session.options('https://httpbin.org/get')
|
||||
session.patch('https://httpbin.org/patch', data=b'data')
|
||||
'''
|
||||
|
||||
# 返回的response对象
|
||||
async def main2():
|
||||
data = {'name': 'germey', 'age': 25}
|
||||
# 有些返回字段前面需要加await有些则不需要,原则是,如果返回的是一个协程对象(如async修饰的方法),
|
||||
# 那么前面就要加await,具体可以看aiohttp的API,其链接为 https://docs.aiohttp.org/en/stable/client_reference.html
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post('https://httpbin.org/post', data=data) as response:
|
||||
print('status:', response.status)
|
||||
print('headers:', response.headers)
|
||||
print('body:', await response.text())
|
||||
print('bytes:', await response.read())
|
||||
print('json:', await response.json())
|
||||
|
||||
# 超时设置
|
||||
async def main3():
|
||||
timeout = aiohttp.ClientTimeout(total=0.1)
|
||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||
async with session.get('https://httpbin.org/get') as response:
|
||||
print('status:', response.status)
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main2())
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 16:57
|
||||
@Usage : 并发限制 防止一次太多爬崩网站 semaphore
|
||||
@Desc :
|
||||
@参考:https://github.dev/Python3WebSpider/AsyncTest
|
||||
'''
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
|
||||
CONCURRENCY = 5
|
||||
URL = 'https://www.baidu.com/'
|
||||
|
||||
semaphore = asyncio.Semaphore(CONCURRENCY)
|
||||
session = None
|
||||
|
||||
|
||||
async def scrape_api():
|
||||
async with semaphore:
|
||||
print('scraping', URL)
|
||||
async with session.get(URL) as response:
|
||||
# await asyncio.sleep(1)
|
||||
return await response.text()
|
||||
|
||||
|
||||
async def main():
|
||||
global session
|
||||
|
||||
session = aiohttp.ClientSession()
|
||||
scrape_index_tasks = [asyncio.ensure_future(scrape_api()) for _ in range(10000)]
|
||||
await asyncio.gather(*scrape_index_tasks)
|
||||
await asyncio.wait(scrape_index_tasks)
|
||||
await session.close()
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# asyncio.run(main())
|
||||
asyncio.get_event_loop().run_until_complete(main())
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 16:34
|
||||
@Usage : 多任务协程
|
||||
@Desc :
|
||||
@参考: https://github.dev/Python3WebSpider/AsyncTest
|
||||
'''
|
||||
|
||||
import asyncio
|
||||
import requests
|
||||
|
||||
async def request():
|
||||
url = 'https://www.baidu.com'
|
||||
status = requests.get(url)
|
||||
return status
|
||||
|
||||
tasks = [asyncio.ensure_future(request()) for _ in range(5)]
|
||||
print('Tasks:', tasks)
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
# 五个任务被顺序执行
|
||||
loop.run_until_complete(asyncio.wait(tasks))
|
||||
|
||||
for task in tasks:
|
||||
print('Task Result:', task.result())
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 16:34
|
||||
@Usage : 多任务协程展示协程的优势
|
||||
@Desc :
|
||||
@参考: https://github.dev/Python3WebSpider/AsyncTest demo8_1和demo9_1 demo10
|
||||
|
||||
'''
|
||||
|
||||
|
||||
import asyncio
|
||||
import requests
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
|
||||
|
||||
# 单个执行每个都至少要5秒
|
||||
async def request():
|
||||
url = 'https://httpbin.org/delay/5'
|
||||
print('Waiting for', url)
|
||||
# 这里无论是加await还是不加await都无法实现真正意义上的异步 需要使用aiohttp
|
||||
response = requests.get(url)
|
||||
print('Get response from', url, 'response', response)
|
||||
|
||||
tasks = [asyncio.ensure_future(request()) for _ in range(10)]
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(asyncio.wait(tasks))
|
||||
|
||||
end = time.time()
|
||||
print('Cost time:', end - start)
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 16:34
|
||||
@Usage : 多任务协程展示协程的优势
|
||||
@Desc :
|
||||
@参考: https://github.dev/Python3WebSpider/AsyncTest demo11
|
||||
|
||||
'''
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
|
||||
|
||||
async def get(url):
|
||||
session = aiohttp.ClientSession()
|
||||
response = await session.get(url)
|
||||
await response.text()
|
||||
await session.close()
|
||||
return response
|
||||
|
||||
|
||||
async def request():
|
||||
url = 'https://httpbin.org/delay/5'
|
||||
print('Waiting for', url)
|
||||
response = await get(url)
|
||||
print('Get response from', url, 'response', response)
|
||||
|
||||
|
||||
tasks = [asyncio.ensure_future(request()) for _ in range(100)]
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(asyncio.wait(tasks))
|
||||
|
||||
end = time.time()
|
||||
print('Cost time:', end - start)
|
||||
# Cost time: 7.670234203338623
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 17:02
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 16:20
|
||||
@Usage : asyncio库 可以使用async和await关键字
|
||||
@Desc :异步爬虫测试 定义协程
|
||||
@参考:https://github.dev/Python3WebSpider/AsyncTest
|
||||
'''
|
||||
import asyncio
|
||||
|
||||
|
||||
async def execute(x):
|
||||
print('Number:', x)
|
||||
return x
|
||||
|
||||
# 创建一个协程对象 coroutine
|
||||
coroutine = execute(1)
|
||||
|
||||
print('Coroutine:', coroutine)
|
||||
print('After calling execute')
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
task = loop.create_task(coroutine)
|
||||
print('Task:', task)
|
||||
loop.run_until_complete(task)
|
||||
print('Task:', task)
|
||||
print('After calling loop')
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 16:20
|
||||
@Usage : asyncio库 可以使用async和await关键字
|
||||
@Desc :异步爬虫测试 定义协程 为某一个task绑定回调方法
|
||||
@参考:https://github.dev/Python3WebSpider/AsyncTest
|
||||
'''
|
||||
import asyncio
|
||||
import requests
|
||||
|
||||
|
||||
async def request():
|
||||
url = 'https://www.baidu.com'
|
||||
status = requests.get(url)
|
||||
return status
|
||||
|
||||
|
||||
def callback(task):
|
||||
print('Status:', task.result())
|
||||
|
||||
|
||||
coroutine = request()
|
||||
task = asyncio.ensure_future(coroutine)
|
||||
# 绑定回调,来保证顺序
|
||||
task.add_done_callback(callback)
|
||||
print('Task:', task)
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(task)
|
||||
print('Task:', task)
|
||||
|
||||
# 直接通过task.result()也可以直接获取结果达到类似的效果
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(task)
|
||||
print('Task:', task)
|
||||
print('Task Result:', task.result())
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/7 19:15
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
||||
import logging
|
||||
from os.path import exists
|
||||
from os import makedirs
|
||||
import json
|
||||
import asyncio
|
||||
from pyppeteer import launch
|
||||
from pyppeteer.errors import TimeoutError
|
||||
|
||||
logging.basicConfig(level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s: %(message)s')
|
||||
|
||||
INDEX_URL = 'https://spa2.scrape.center/page/{page}'
|
||||
TIMEOUT = 10
|
||||
TOTAL_PAGE = 10
|
||||
RESULTS_DIR = 'results'
|
||||
WINDOW_WIDTH, WINDOW_HEIGHT = 1366, 768
|
||||
|
||||
exists(RESULTS_DIR) or makedirs(RESULTS_DIR)
|
||||
|
||||
browser, tab = None, None
|
||||
HEADLESS = True
|
||||
|
||||
|
||||
async def init():
|
||||
global browser, tab
|
||||
browser = await launch(headless=HEADLESS,
|
||||
args=['--disable-infobars', f'--window-size={WINDOW_WIDTH},{WINDOW_HEIGHT}'])
|
||||
tab = await browser.newPage()
|
||||
await tab.setViewport({'width': WINDOW_WIDTH, 'height': WINDOW_HEIGHT})
|
||||
|
||||
|
||||
async def scrape_page(url, selector):
|
||||
logging.info('scraping %s', url)
|
||||
try:
|
||||
await tab.goto(url)
|
||||
await tab.waitForSelector(selector, options={
|
||||
'timeout': TIMEOUT * 1000
|
||||
})
|
||||
except TimeoutError:
|
||||
logging.error('error occurred while scraping %s', url, exc_info=True)
|
||||
|
||||
|
||||
async def scrape_index(page):
|
||||
url = INDEX_URL.format(page=page)
|
||||
await scrape_page(url, '.item .name')
|
||||
|
||||
|
||||
async def parse_index():
|
||||
return await tab.querySelectorAllEval('.item .name', 'nodes => nodes.map(node => node.href)')
|
||||
|
||||
|
||||
async def scrape_detail(url):
|
||||
await scrape_page(url, 'h2')
|
||||
|
||||
|
||||
async def parse_detail():
|
||||
url = tab.url
|
||||
name = await tab.querySelectorEval('h2', 'node => node.innerText')
|
||||
categories = await tab.querySelectorAllEval('.categories button span', 'nodes => nodes.map(node => node.innerText)')
|
||||
cover = await tab.querySelectorEval('.cover', 'node => node.src')
|
||||
score = await tab.querySelectorEval('.score', 'node => node.innerText')
|
||||
drama = await tab.querySelectorEval('.drama p', 'node => node.innerText')
|
||||
return {
|
||||
'url': url,
|
||||
'name': name,
|
||||
'categories': categories,
|
||||
'cover': cover,
|
||||
'score': score,
|
||||
'drama': drama
|
||||
}
|
||||
|
||||
|
||||
async def save_data(data):
|
||||
name = data.get('name')
|
||||
data_path = f'{RESULTS_DIR}/{name}.json'
|
||||
json.dump(data, open(data_path, 'w', encoding='utf-8'), ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
async def main():
|
||||
await init()
|
||||
try:
|
||||
for page in range(1, TOTAL_PAGE + 1):
|
||||
await scrape_index(page)
|
||||
detail_urls = await parse_index()
|
||||
for detail_url in detail_urls:
|
||||
await scrape_detail(detail_url)
|
||||
detail_data = await parse_detail()
|
||||
logging.info('data %s', detail_data)
|
||||
await save_data(detail_data)
|
||||
finally:
|
||||
await browser.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.get_event_loop().run_until_complete(main())
|
||||
|
|
@ -0,0 +1,111 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/7 15:58
|
||||
@Usage : 使用Selenium实战爬取 https://spa2.scrape.center/
|
||||
@Desc : 该网站爬取详情页时存在一个token,这个token的实现逻辑可能不确定,并且随事件发生变化,
|
||||
因此需要使用Selenium模拟浏览器操作跳过这段逻辑
|
||||
'''
|
||||
from selenium import webdriver
|
||||
from selenium.common.exceptions import TimeoutException
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support.wait import WebDriverWait
|
||||
from os import makedirs
|
||||
from os.path import exists
|
||||
import logging
|
||||
from urllib.parse import urljoin
|
||||
import json
|
||||
|
||||
logging.basicConfig(level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s: %(message)s')
|
||||
|
||||
INDEX_URL = 'https://spa2.scrape.center/page/{page}'
|
||||
Timeout = 10
|
||||
Total_page = 10
|
||||
RESULTS_DIR = 'result'
|
||||
|
||||
exists(RESULTS_DIR) or makedirs(RESULTS_DIR)
|
||||
|
||||
# 防止有一些网站设置反屏蔽手段
|
||||
options = webdriver.ChromeOptions()
|
||||
options.add_experimental_option('excludeSwitches', ['enable-automation'])
|
||||
options.add_experimental_option('useAutomationExtension', False)
|
||||
|
||||
# 显示设置超时时间
|
||||
browser = webdriver.Chrome(options=options)
|
||||
wait = WebDriverWait(browser, Timeout)
|
||||
|
||||
|
||||
# 爬取网页
|
||||
def scrape_page(url, condition, locator):
|
||||
logging.info('scraping %s', url)
|
||||
try:
|
||||
browser.get(url)
|
||||
# 设置等待
|
||||
wait.until(condition(locator))
|
||||
except TimeoutException:
|
||||
logging.error('error occurred while scraping %s', url, exc_info=True)
|
||||
|
||||
|
||||
def scrape_index(page):
|
||||
url = INDEX_URL.format(page=page)
|
||||
# 设置等待条件为当所有的index下面的子item都出来之后
|
||||
scrape_page(url, EC.visibility_of_all_elements_located, locator=(By.CSS_SELECTOR, '#index .item'))
|
||||
|
||||
|
||||
def parse_index():
|
||||
titles = browser.find_elements(By.CSS_SELECTOR, '#index .item .name')
|
||||
for title in titles:
|
||||
href = title.get_attribute("href")
|
||||
yield urljoin(INDEX_URL, href)
|
||||
|
||||
|
||||
def scrape_detail(url):
|
||||
return scrape_page(url, EC.visibility_of_element_located, (By.TAG_NAME, 'h2'))
|
||||
|
||||
|
||||
def parse_detail():
|
||||
url = browser.current_url
|
||||
name = browser.find_element(By.TAG_NAME, 'h2').text
|
||||
category = [element.text for element in browser.find_elements(By.CSS_SELECTOR, '.categories button span')]
|
||||
cover = browser.find_element(By.CLASS_NAME, 'cover').get_attribute("src")
|
||||
score = browser.find_element(By.CLASS_NAME, 'score').text
|
||||
drama = browser.find_element(By.CSS_SELECTOR, '.drama p').text
|
||||
return {
|
||||
"url": url,
|
||||
"name": name,
|
||||
"category": category,
|
||||
"cover": cover,
|
||||
"score": score,
|
||||
"drama": drama
|
||||
}
|
||||
|
||||
|
||||
def save_data(data):
|
||||
name = data.get('name')
|
||||
data_path = f'{RESULTS_DIR}/{name}.json'
|
||||
json.dump(data, open(data_path, 'w', encoding='utf-8'), ensure_ascii=False, indent=2)
|
||||
|
||||
def main():
|
||||
try:
|
||||
|
||||
for page in range(1, Total_page + 1):
|
||||
scrape_index(page)
|
||||
# 页面加载完毕之后,获取对应的url
|
||||
detail_urls=list(parse_index())
|
||||
# logging.info('detail data %s', list(detail_urls))
|
||||
# 遍历所有的detail_urls,获取详情页信息
|
||||
for detail_url in detail_urls:
|
||||
scrape_detail(detail_url)
|
||||
detail_info = parse_detail()
|
||||
logging.info('detail info %s', detail_info)
|
||||
save_data(detail_info)
|
||||
|
||||
finally:
|
||||
browser.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 19:46
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/7 19:32
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/7 19:32
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
||||
from selenium import webdriver
|
||||
from pyquery import PyQuery as pq
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.support.wait import WebDriverWait
|
||||
import re
|
||||
|
||||
|
||||
# 解析名字,排序获得正确的顺序
|
||||
def parse_name(name_html):
|
||||
chars = name_html('.char')
|
||||
items = []
|
||||
for char in chars.items():
|
||||
items.append({
|
||||
'text': char.text().strip(),
|
||||
'left': int(re.search('(\d+)px', char.attr('style')).group(1))
|
||||
})
|
||||
items = sorted(items, key=lambda x: x['left'], reverse=False)
|
||||
return ''.join([item.get('text') for item in items])
|
||||
|
||||
|
||||
# 判断如果是完整的就不进行下述操作
|
||||
def parse_name_whole(name_html):
|
||||
has_whole = name_html('.whole')
|
||||
if has_whole:
|
||||
return name_html.text()
|
||||
else:
|
||||
chars = name_html('.char')
|
||||
items = []
|
||||
for char in chars.items():
|
||||
items.append({
|
||||
'text': char.text().strip(),
|
||||
'left': int(re.search('(\d+)px', char.attr('style')).group(1))
|
||||
})
|
||||
items = sorted(items, key=lambda x: x['left'], reverse=False)
|
||||
return ''.join([item.get('text') for item in items])
|
||||
|
||||
|
||||
browser = webdriver.Chrome()
|
||||
browser.get('https://antispider3.scrape.center/')
|
||||
WebDriverWait(browser, 10) \
|
||||
.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.item')))
|
||||
html = browser.page_source
|
||||
doc = pq(html)
|
||||
names = doc('.item .name')
|
||||
|
||||
for name_html in names.items():
|
||||
name = parse_name_whole(name_html)
|
||||
print(name)
|
||||
browser.close()
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/7 13:27
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/7 13:34
|
||||
@Usage : playwright基本使用
|
||||
@Desc :
|
||||
@参考:https://github.dev/Python3WebSpider/PlaywrightTest
|
||||
'''
|
||||
|
||||
# playwright既支持Pyppetter的异步模式,又支持selenium的同步模式
|
||||
import asyncio
|
||||
# 同步模式
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
with sync_playwright() as p:
|
||||
for browser_type in [p.chromium, p.firefox, p.webkit]:
|
||||
browser = browser_type.launch(headless=False)
|
||||
page = browser.new_page()
|
||||
page.goto('https://www.baidu.com')
|
||||
page.screenshot(path=f'screenshot-{browser_type.name}.png')
|
||||
print(page.title())
|
||||
browser.close()
|
||||
|
||||
|
||||
# 异步模式
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
|
||||
async def main():
|
||||
async with async_playwright() as p:
|
||||
for browser_type in [p.chromium, p.firefox, p.webkit]:
|
||||
browser = await browser_type.launch(headless=False)
|
||||
page = await browser.new_page()
|
||||
await page.goto('https://www.baidu.com')
|
||||
await page.screenshot(path=f'screenshot-{browser_type.name}.png')
|
||||
print(await page.title())
|
||||
await browser.close()
|
||||
|
||||
asyncio.run(main())
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/7 14:00
|
||||
@Usage :
|
||||
@Desc :playWright有一个强大的功能,是可以录制我们在浏览器中的操作并自动生成代码
|
||||
'''
|
||||
|
||||
from playwright.sync_api import Playwright, sync_playwright, expect
|
||||
|
||||
|
||||
def run(playwright: Playwright) -> None:
|
||||
browser = playwright.firefox.launch(headless=False)
|
||||
# 这里使用context而不是browser,可以让每个context都是一个独立的上下文环境,资源隔离
|
||||
context = browser.new_context()
|
||||
page = context.new_page()
|
||||
page.goto("https://www.baidu.com/")
|
||||
page.locator("#kw").click()
|
||||
page.locator("#kw").fill("python")
|
||||
page.get_by_role("button", name="百度一下").click()
|
||||
page.get_by_role("button", name="百度一下").click()
|
||||
page.locator("#kw").click()
|
||||
page.locator("#kw").fill("nba")
|
||||
page.get_by_role("button", name="百度一下").click()
|
||||
page.close()
|
||||
|
||||
# ---------------------
|
||||
context.close()
|
||||
browser.close()
|
||||
|
||||
|
||||
with sync_playwright() as playwright:
|
||||
run(playwright)
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/7 14:40
|
||||
@Usage :
|
||||
@Desc :playwright还支持移动端浏览器
|
||||
'''
|
||||
|
||||
import time
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
# 模拟打开iPhone 12 Pro Max的safari浏览器
|
||||
with sync_playwright() as p:
|
||||
iphone_12_pro_max = p.devices['iPhone 12 Pro Max']
|
||||
browser = p.webkit.launch(headless=False)
|
||||
context = browser.new_context(
|
||||
**iphone_12_pro_max,
|
||||
locale='zh-CN',
|
||||
)
|
||||
page = context.new_page()
|
||||
page.goto('https://www.whatismybrowser.com/')
|
||||
# 等待页面的某个状态完成,这里传入的state是networkidle,表示网络空闲状态
|
||||
page.wait_for_load_state(state='networkidle')
|
||||
page.screenshot(path='browser-info.png')
|
||||
time.sleep(10)
|
||||
browser.close()
|
||||
|
||||
|
||||
with sync_playwright() as p:
|
||||
iphone_12_pro_max = p.devices['iPhone 12 Pro Max']
|
||||
browser = p.webkit.launch(headless=False)
|
||||
context = browser.new_context(
|
||||
**iphone_12_pro_max,
|
||||
locale='zh-CN',
|
||||
geolocation={'longitude': 116.39014, 'latitude': 39.913904},
|
||||
permissions=['geolocation']
|
||||
)
|
||||
page = context.new_page()
|
||||
page.goto('https://amap.com')
|
||||
page.wait_for_load_state(state='networkidle')
|
||||
page.screenshot(path='location-iphone.png')
|
||||
time.sleep(10)
|
||||
browser.close()
|
||||
|
|
@ -0,0 +1,100 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/7 15:12
|
||||
@Usage :
|
||||
@Desc : playwright常用操作
|
||||
'''
|
||||
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
|
||||
# 事件监听
|
||||
def on_response(response):
|
||||
print(f'Statue {response.status}: {response.url}')
|
||||
|
||||
|
||||
# 截获ajax命令
|
||||
def on_response1(response):
|
||||
if '/api/movie/' in response.url and response.status == 200:
|
||||
print(response.json())
|
||||
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=False)
|
||||
page = browser.new_page()
|
||||
# 监听response时间,每次网络请求得到响应的时候会触发这个事件
|
||||
# page.on('response', on_response)
|
||||
page.on('response', on_response1)
|
||||
page.goto('https://spa6.scrape.center/')
|
||||
page.wait_for_load_state('networkidle')
|
||||
browser.close()
|
||||
|
||||
获取页面源代码
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=False)
|
||||
page = browser.new_page()
|
||||
page.goto('https://spa6.scrape.center/')
|
||||
page.wait_for_load_state('networkidle')
|
||||
html = page.content()
|
||||
print(html)
|
||||
browser.close()
|
||||
|
||||
# 获取节点内容
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=False)
|
||||
page = browser.new_page()
|
||||
page.goto('https://spa6.scrape.center/')
|
||||
page.wait_for_load_state('networkidle')
|
||||
# 代表查找class为name的a节点,第二个参数传href表示获取超链接的内容
|
||||
href = page.get_attribute('a.name', 'href')
|
||||
print(href)
|
||||
browser.close()
|
||||
|
||||
# 获取多个节点
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=False)
|
||||
page = browser.new_page()
|
||||
page.goto('https://spa6.scrape.center/')
|
||||
page.wait_for_load_state('networkidle')
|
||||
elements = page.query_selector_all('a.name')
|
||||
for element in elements:
|
||||
print(element.get_attribute('href'))
|
||||
print(element.text_content())
|
||||
browser.close()
|
||||
|
||||
# 网络拦截
|
||||
import re
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=False)
|
||||
page = browser.new_page()
|
||||
|
||||
|
||||
def canel_request(route, request):
|
||||
route.abort()
|
||||
|
||||
|
||||
page.route(re.compile(r"(\.png)|(\.jpg)"), canel_request)
|
||||
page.goto("https://spa6.scrape.center/")
|
||||
page.wait_for_load_state("networkidle")
|
||||
page.screenshot(path='no_picture.png')
|
||||
browser.close()
|
||||
|
||||
# 拦截之后填充自己的
|
||||
import time
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=False)
|
||||
page = browser.new_page()
|
||||
|
||||
|
||||
def modify_response(route, request):
|
||||
route.fulfill(path="./custom_response.html")
|
||||
|
||||
|
||||
page.route('/', modify_response)
|
||||
page.goto("https://spa6.scrape.center/")
|
||||
time.sleep(10)
|
||||
browser.close()
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
from playwright.sync_api import Playwright, sync_playwright, expect
|
||||
|
||||
|
||||
def run(playwright: Playwright) -> None:
|
||||
browser = playwright.firefox.launch(headless=False)
|
||||
context = browser.new_context()
|
||||
page = context.new_page()
|
||||
page.goto("https://www.baidu.com/")
|
||||
page.locator("#kw").click()
|
||||
page.locator("#kw").fill("python")
|
||||
page.get_by_role("button", name="百度一下").click()
|
||||
page.get_by_role("button", name="百度一下").click()
|
||||
page.locator("#kw").click()
|
||||
page.locator("#kw").fill("nba")
|
||||
page.get_by_role("button", name="百度一下").click()
|
||||
page.close()
|
||||
|
||||
# ---------------------
|
||||
context.close()
|
||||
browser.close()
|
||||
|
||||
|
||||
with sync_playwright() as playwright:
|
||||
run(playwright)
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 19:53
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 19:53
|
||||
@Usage :
|
||||
@Desc : selenium基本用法
|
||||
@参考: https://github.dev/Python3WebSpider/SeleniumTest
|
||||
'''
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.support.wait import WebDriverWait
|
||||
|
||||
browser = webdriver.Chrome()
|
||||
try:
|
||||
browser.get('https://www.baidu.com')
|
||||
# input = browser.find_element_by_id('kw') 旧版写法,selenium4.0以上使用下面的写法
|
||||
input = browser.find_element(By.ID, 'kw')
|
||||
input.send_keys('Python')
|
||||
input.send_keys(Keys.ENTER)
|
||||
wait = WebDriverWait(browser, 10)
|
||||
wait.until(EC.presence_of_element_located((By.ID, 'content_left')))
|
||||
print(browser.current_url)
|
||||
print(browser.get_cookies())
|
||||
print(browser.page_source)
|
||||
finally:
|
||||
browser.close()
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 21:11
|
||||
@Usage : 对Cookie进行操作
|
||||
@Desc :获取,添加,删除cookie
|
||||
'''
|
||||
|
||||
from selenium import webdriver
|
||||
|
||||
browser = webdriver.Chrome()
|
||||
browser.get('https://www.zhihu.com/explore')
|
||||
print(browser.get_cookies())
|
||||
browser.add_cookie({'name': 'name', 'domain': 'www.zhihu.com', 'value': 'germey'})
|
||||
print(browser.get_cookies())
|
||||
browser.delete_all_cookies()
|
||||
print(browser.get_cookies())
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 21:14
|
||||
@Usage : 选项卡管理
|
||||
@Desc : 访问页面的时候会开起一个个选项卡
|
||||
'''
|
||||
|
||||
import time
|
||||
from selenium import webdriver
|
||||
|
||||
browser = webdriver.Chrome()
|
||||
browser.get('https://www.baidu.com')
|
||||
# 开启一个新的选项卡
|
||||
browser.execute_script('window.open()')
|
||||
print(browser.window_handles)
|
||||
browser.switch_to.window(browser.window_handles[1])
|
||||
browser.get('https://www.taobao.com')
|
||||
time.sleep(1)
|
||||
browser.switch_to.window(browser.window_handles[0])
|
||||
browser.get('https://python.org')
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 21:17
|
||||
@Usage : 异常处理
|
||||
@Desc : 可能会遇到获取节点失败的异常,可以对异常进行处理
|
||||
'''
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.common.exceptions import TimeoutException, NoSuchElementException
|
||||
|
||||
browser = webdriver.Chrome()
|
||||
|
||||
try:
|
||||
browser.get('https://www.baidu.com')
|
||||
except TimeoutException:
|
||||
print('Time out')
|
||||
|
||||
try:
|
||||
browser.find_element(By.ID, 'hello')
|
||||
except NoSuchElementException:
|
||||
print('No Such Element')
|
||||
finally:
|
||||
browser.close()
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 21:20
|
||||
@Usage : 反屏蔽
|
||||
@Desc : 现在很多网站增加了对Selenium的监测,如果检测到Selenium打开浏览器就直接屏蔽
|
||||
基本原理是监测当前浏览器窗口下的window.navigator对象中是否包含webdriver属性。
|
||||
正常使用浏览器这个属性应该是undefined,一旦使用了Selenium,就会给window.navigator设置webdriver属性
|
||||
https://antispider1.scrape.center/ 就是使用了上述原理
|
||||
'''
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver import ChromeOptions
|
||||
|
||||
option = ChromeOptions()
|
||||
option.add_experimental_option('excludeSwitches', ['enable-automation'])
|
||||
option.add_experimental_option('useAutomationExtension', False)
|
||||
browser = webdriver.Chrome(options=option)
|
||||
# 无效,因为这是页面加载完毕之后才执行,但是页面渲染之前已经检测了
|
||||
browser.execute_script('Object.defineProperty(navigator, "webdriver", {get: () => undefined})')
|
||||
browser.get('https://antispider1.scrape.center/')
|
||||
|
||||
|
||||
|
||||
# 使用CDP(chrome开发工具协议)解决这个问题,在每个页面刚加载的时候就执行JavaScript语句,将webdriver置空
|
||||
option = ChromeOptions()
|
||||
option.add_experimental_option('excludeSwitches', ['enable-automation'])
|
||||
option.add_experimental_option('useAutomationExtension', False)
|
||||
browser = webdriver.Chrome(options=option)
|
||||
browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
|
||||
'source': 'Object.defineProperty(navigator, "webdriver", {get: () => undefined})'
|
||||
})
|
||||
browser.get('https://antispider1.scrape.cuiqingcai.com/')
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 21:31
|
||||
@Usage : 无头模式
|
||||
@Desc : 之前的案例运行时,总会弹出一个路浏览器窗口
|
||||
现在已经支持无头模式Headless
|
||||
'''
|
||||
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver import ChromeOptions
|
||||
|
||||
option = ChromeOptions()
|
||||
option.add_argument('--headless')
|
||||
browser = webdriver.Chrome(options=option)
|
||||
browser.set_window_size(1366, 768)
|
||||
browser.get('https://www.baidu.com')
|
||||
browser.get_screenshot_as_file('preview.png')
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 19:59
|
||||
@Usage :
|
||||
@Desc :selenium访问页面与查找节点
|
||||
'''
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
|
||||
browser = webdriver.Chrome()
|
||||
browser.get('https://www.taobao.com')
|
||||
input_first = browser.find_element(By.ID, 'q')
|
||||
input_second = browser.find_element(By.CSS_SELECTOR, '#q')
|
||||
input_third = browser.find_element(By.XPATH, '//*[@id="q"]')
|
||||
print(input_first, input_second, input_third)
|
||||
# 多个节点
|
||||
lis = browser.find_elements(By.CSS_SELECTOR,'.service-bd li')
|
||||
print(lis)
|
||||
browser.close()
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 20:04
|
||||
@Usage :
|
||||
@Desc :selenium节点交互 驱动浏览器实现一些操作
|
||||
'''
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
import time
|
||||
|
||||
browser = webdriver.Chrome()
|
||||
browser.get('https://www.taobao.com')
|
||||
input = browser.find_element(By.ID, 'q')
|
||||
input.send_keys('iPhone') # 输入文字
|
||||
time.sleep(1)
|
||||
input.clear() # 清空文字
|
||||
input.send_keys('iPad')
|
||||
button = browser.find_element(By.CLASS_NAME, 'btn-search')
|
||||
button.click() # 点击搜索
|
||||
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 20:08
|
||||
@Usage :
|
||||
@Desc :selenium动作链 一系列动作连续执行
|
||||
'''
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver import ActionChains
|
||||
from selenium.webdriver.common.by import By
|
||||
|
||||
browser = webdriver.Chrome()
|
||||
url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
|
||||
browser.get(url)
|
||||
browser.switch_to.frame('iframeResult')
|
||||
source = browser.find_element(By.CSS_SELECTOR, '#draggable')
|
||||
target = browser.find_element(By.CSS_SELECTOR, '#droppable')
|
||||
actions = ActionChains(browser)
|
||||
# 模拟鼠标的点击与放下
|
||||
actions.drag_and_drop(source, target)
|
||||
actions.perform() # 正式执行上述模拟操作
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 20:14
|
||||
@Usage :
|
||||
@Desc :selenium运行javaScrip,有一些操作selenium没有提供API,这时可以直接通过运行javascript实现
|
||||
'''
|
||||
|
||||
from selenium import webdriver
|
||||
import time
|
||||
browser = webdriver.Chrome()
|
||||
browser.get('https://www.zhihu.com/explore')
|
||||
# browser.get('https://www.taobao.com')
|
||||
# 将进度条下拉到最底部
|
||||
browser.execute_script('window.scrollTo(0, document.body.scrollHeight)')
|
||||
# 弹出警告提示框
|
||||
browser.execute_script('alert("To Bottom")')
|
||||
time.sleep(5)
|
||||
browser.close()
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 20:20
|
||||
@Usage :
|
||||
@Desc :获取节点信息
|
||||
'''
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
|
||||
browser = webdriver.Chrome()
|
||||
url = 'https://spa2.scrape.center/'
|
||||
browser.get(url)
|
||||
logo = browser.find_element(By.CLASS_NAME, 'logo-image')
|
||||
print(logo)
|
||||
# 获取属性
|
||||
print(logo.get_attribute('src'))
|
||||
# 获取文本值
|
||||
title = browser.find_element(By.CLASS_NAME, 'logo-title')
|
||||
print(title.text)
|
||||
# 获取ID,位置,标签名,大小
|
||||
print(title.id)
|
||||
print(title.location)
|
||||
print(title.tag_name)
|
||||
print(title.size)
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 20:31
|
||||
@Usage : 切换Frame
|
||||
@Desc : 网页中有一种节点叫iframe,相当于页面的子页面,
|
||||
selenium打开一个页面后,默认是在父Frame里面操作,这时需要使用switch_to.frame方法切换
|
||||
'''
|
||||
import time
|
||||
from selenium import webdriver
|
||||
from selenium.common.exceptions import NoSuchElementException
|
||||
from selenium.webdriver.common.by import By
|
||||
|
||||
browser = webdriver.Chrome()
|
||||
url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
|
||||
browser.get(url)
|
||||
browser.switch_to.frame('iframeResult')
|
||||
try:
|
||||
logo = browser.find_element(By.CLASS_NAME, 'logo')
|
||||
except NoSuchElementException:
|
||||
print('NO LOGO')
|
||||
browser.switch_to.parent_frame()
|
||||
logo = browser.find_element(By.CLASS_NAME, 'logo')
|
||||
print(logo)
|
||||
print(logo.text)
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 20:38
|
||||
@Usage : 延时等待
|
||||
@Desc :get方法在网页框架加载结束后才会结束执行
|
||||
所以在get方法执行完毕之后其结果可能并不是浏览器完全加载完成的页面
|
||||
所以在必要时我们需要设置浏览器延时等待一段时间
|
||||
'''
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
|
||||
browser = webdriver.Chrome()
|
||||
# 隐式等待 :效果并不好,因为只规定了一个固定时间,页面加载事件会受到网络条件影响
|
||||
browser.implicitly_wait(10)
|
||||
browser.get('https://spa2.scrape.center/')
|
||||
input = browser.find_element(By.CLASS_NAME, 'logo-image')
|
||||
print(input)
|
||||
|
||||
|
||||
# 显示等待:指定要查找的节点和最长等待时间
|
||||
browser.get('https://www.taobao.com/')
|
||||
wait = WebDriverWait(browser, 10)
|
||||
# presence_of_element_located这个条件表示节点出现
|
||||
input = wait.until(EC.presence_of_element_located((By.ID, 'q')))
|
||||
# element_to_be_clickable表示按钮可点击
|
||||
button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.btn-search')))
|
||||
print(input, button)
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/6 20:38
|
||||
@Usage : 模拟浏览器前进后退功能
|
||||
@Desc :
|
||||
'''
|
||||
import time
|
||||
from selenium import webdriver
|
||||
|
||||
browser = webdriver.Chrome()
|
||||
browser.get('https://www.baidu.com/')
|
||||
browser.get('https://www.taobao.com/')
|
||||
browser.get('https://www.python.org/')
|
||||
# 后退
|
||||
browser.back()
|
||||
time.sleep(1)
|
||||
# 前进
|
||||
browser.forward()
|
||||
browser.close()
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/7 20:07
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/7 20:07
|
||||
@Usage :
|
||||
@Desc :字体反扒测试
|
||||
'''
|
||||
|
||||
from selenium import webdriver
|
||||
from pyquery import PyQuery as pq
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.support.wait import WebDriverWait
|
||||
|
||||
browser = webdriver.Chrome()
|
||||
browser.get('https://antispider4.scrape.center/')
|
||||
WebDriverWait(browser, 10) \
|
||||
.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.item')))
|
||||
html = browser.page_source
|
||||
doc = pq(html)
|
||||
items = doc('.item')
|
||||
for item in items.items():
|
||||
name = item('.name').text()
|
||||
categories = [o.text() for o in item('.categories button').items()]
|
||||
score = item('.score').text()
|
||||
print(f'name: {name} categories: {categories} score: {score}')
|
||||
browser.close()
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/7 20:20
|
||||
@Usage :
|
||||
@Desc :尝试解析对应的css源文件,来获取对应的我们想要的
|
||||
'''
|
||||
|
||||
import re
|
||||
import requests
|
||||
url = 'https://antispider4.scrape.center/css/app.654ba59e.css'
|
||||
|
||||
|
||||
response = requests.get(url)
|
||||
pattern = re.compile('.icon-(.*?):before\{content:"(.*?)"\}')
|
||||
results = re.findall(pattern, response.text)
|
||||
icon_map = {item[0]: item[1] for item in results}
|
||||
print(icon_map['789'])
|
||||
print(icon_map['437'])
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/12/7 20:22
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
||||
from selenium import webdriver
|
||||
from pyquery import PyQuery as pq
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.support.wait import WebDriverWait
|
||||
import re
|
||||
import requests
|
||||
url = 'https://antispider4.scrape.center/css/app.654ba59e.css'
|
||||
|
||||
|
||||
response = requests.get(url)
|
||||
pattern = re.compile('.icon-(.*?):before\{content:"(.*?)"\}')
|
||||
results = re.findall(pattern, response.text)
|
||||
icon_map = {item[0]: item[1] for item in results}
|
||||
|
||||
|
||||
def parse_score(item):
|
||||
elements = item('.icon')
|
||||
icon_values = []
|
||||
for element in elements.items():
|
||||
class_name = (element.attr('class'))
|
||||
icon_key = re.search('icon-(\d+)', class_name).group(1)
|
||||
icon_value = icon_map.get(icon_key)
|
||||
icon_values.append(icon_value)
|
||||
return ''.join(icon_values)
|
||||
|
||||
|
||||
browser = webdriver.Chrome()
|
||||
browser.get('https://antispider4.scrape.center/')
|
||||
WebDriverWait(browser, 10) \
|
||||
.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.item')))
|
||||
html = browser.page_source
|
||||
doc = pq(html)
|
||||
items = doc('.item')
|
||||
for item in items.items():
|
||||
name = item('.name').text()
|
||||
categories = [o.text() for o in item('.categories button').items()]
|
||||
score = parse_score(item)
|
||||
print(f'name: {name} categories: {categories} score: {score}')
|
||||
browser.close()
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue