This commit is contained in:
markilue 2024-01-23 14:10:37 +08:00
commit 8d530054ae
4653 changed files with 61091 additions and 497 deletions

View File

@ -0,0 +1,20 @@
package com.atguigu.spark.core.rdd.operator.transform
import org.apache.spark.api.java.function.FilterFunction
/**
* @ClassName aaa.java
* @author dingjiawen@xiaomi.com
* @version 1.0.0
* @Description TODO
* @createTime 2023-07-19 18:44:00
*/
class AAA(bbb: Int) extends FilterFunction[Int] {
val cc = bbb
override def call(t: Int): Boolean = {
println(cc)
return true
}
}

View File

@ -0,0 +1,18 @@
package com.atguigu.spark.core.rdd.operator.transform
/**
* @ClassName aaa.java
* @author dingjiawen@xiaomi.com
* @version 1.0.0
* @Description TODO
* @createTime 2023-07-19 18:44:00
*/
class CCC(bbb: Int) extends (Int => Boolean) with Serializable {
val cc = bbb
override def apply(t: Int): Boolean = {
println(cc)
return true
}
}

View File

@ -0,0 +1,69 @@
package com.atguigu.spark.core.rdd.operator.transform
import com.fasterxml.jackson.databind.ObjectMapper
import net.jpountz.xxhash.{XXHash64, XXHashFactory}
import org.apache.log4j.Logger
import org.apache.spark.api.java.function.FilterFunction
import java.nio.ByteBuffer
/**
* created by likunyi@xiaomi.com
* at 2022-04-29 15:32:00
* 该算子会对收到的数据进行抽样, 留下一部分数据, 丢掉另一部分数据
*
* samplingRate是采样率, 只能是整数
* 当samplingRate = 10, 代表采样率是10%
* 当samplingRate = 64, 代表采样率是64%
*/
class HdfsToHolo_MessageSampler1(samplingRate: Int) extends FilterFunction[String] {
@transient private var objectMapper: ObjectMapper = new ObjectMapper()
@transient private val greatestCommonDivisor = 100
@transient private val NUMERATOR: Int = samplingRate / greatestCommonDivisor
@transient private val DENOMINATOR: Int = 100 / greatestCommonDivisor
@transient private val XXHASH_SEED: Long = 0x9747b28c
@transient private val hasher: XXHash64 = XXHashFactory.fastestInstance().hash64()
@transient private val logger: Logger = Logger.getLogger(this.getClass.getName)
//初始化各项参数
// def open(): Tuple5[ObjectMapper, Int, Int, Long, XXHash64] = {
//
// objectMapper = new ObjectMapper()
//
// val numerator = samplingRate // 分子就是采样率
// val denominator = 100 // 分母永远是100
// val greatestCommonDivisor = MathUtils.getGCD(numerator, denominator)
//
// NUMERATOR = numerator / greatestCommonDivisor
// DENOMINATOR = denominator / greatestCommonDivisor
// XXHASH_SEED = 0x9747b28c
// hasher = XXHashFactory.fastestInstance().hash64()
// (objectMapper,NUMERATOR,DENOMINATOR,XXHASH_SEED,hasher)
// } // open
override def call(input: String): Boolean = {
logger.info(s"所有参数【objectMapper${objectMapper}【NUMERATOR${NUMERATOR}【DENOMINATOR${DENOMINATOR}【hasher${hasher}")
logger.info(s"${input}")
// 如果不抽样(即抽样率是100%), 则直接将数据传递给下游, 不做任何处理
if (samplingRate == 100) {
return true
}
val currentMessage = objectMapper.readTree(input)
// 如需抽样(即抽样率不是100%), 则使用distinct_id的哈希值去做抽样
if (hash(currentMessage.get("distinct_id").asText()) % DENOMINATOR < NUMERATOR) { // 粗略地讲: 若采样率是64%, 则就将distinct_id的哈希值对100取余, 余数为0到63的都留下, 余数为64到99的都扔掉
return true
} else {
return false
}
}
private def hash(distinct_id: String): Long = {
Math.abs(hasher.hash(ByteBuffer.wrap(distinct_id.getBytes("UTF-8")), XXHASH_SEED))
} // hash
}

View File

@ -1,5 +1,6 @@
package com.atguigu.spark.core.rdd.operator.transform
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object Spark01_RDD_Operator_Transform {
@ -11,7 +12,7 @@ object Spark01_RDD_Operator_Transform {
val sc =new SparkContext(sparkConf)
//TODO 算子 - map
val rdd = sc.makeRDD(
val rdd: RDD[Int] = sc.makeRDD(
List(1,2,3,4)
)

View File

@ -1,9 +1,5 @@
package com.atguigu.spark.core.rdd.operator.transform
import java.text.SimpleDateFormat
import java.util.Date
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
/**
@ -11,21 +7,25 @@ import org.apache.spark.{SparkConf, SparkContext}
*/
object Spark08_RDD_Operator_Transform {
def main(args: Array[String]): Unit = {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator")
//创建上下文环境对象
val sc =new SparkContext(sparkConf)
val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator")
//创建上下文环境对象
val sc = new SparkContext(sparkConf)
//TODO 算子 - filter
val rdd = sc.makeRDD(
List(1,2,3,4)
)
//TODO 算子 - filter
val rdd = sc.makeRDD(
List(1, 2, 3, 4)
)
val filterRDD = rdd.filter(_ % 2 == 0)
val filterRDD = rdd.filter(
new CCC(10)
)
filterRDD.collect().foreach(println(_))
// val filterRDD = rdd.filter(_ % 2 == 0)
sc.stop()
}
filterRDD.collect().foreach(println(_))
sc.stop()
}
}

View File

@ -2,7 +2,7 @@ package com.atguigu.spark.streaming
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.dstream.ReceiverInputDStream
import org.apache.spark.streaming.dstream.{InputDStream, ReceiverInputDStream}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import scala.collection.mutable
@ -27,7 +27,7 @@ object SparkStreaming02_Queue {
val rddQueue = new mutable.Queue[RDD[Int]]()
//4.创建QueueInputDStream
val inputStream = ssc.queueStream(rddQueue,oneAtATime = false)
val inputStream: InputDStream[Int] = ssc.queueStream(rddQueue,oneAtATime = false)
//5.处理队列中的RDD数据

View File

@ -29,10 +29,10 @@ object SparkStreaming06_state_transform {
//和直接map的区别:写code的位置
//Driver端
val newDS: DStream[String] = lines.transform(
rdd =>{
rdd => {
//code:Driver端(周期性执行)
rdd.map(
str =>{
str => {
//code:Executor端
str
}
@ -42,7 +42,7 @@ object SparkStreaming06_state_transform {
//code:Driver端
val newDS1: DStream[String] = lines.map(
str =>{
str => {
//code:executor端无周期性执行
str
}

View File

@ -0,0 +1,102 @@
package com.atguigu.gmall.realtime.app.dwm
import scala.collection.mutable
/**
* @ClassName GenerateRPN.java
* @author dingjiawen@xiaomi.com
* @version 1.0.0
* @Description TODO
* @createTime 2023-06-30 17:27:00
*/
object GenerateRPN {
def main(args: Array[String]): Unit = {
//测试用例
//String str = "1+2*3-4*5-6+7*8-9"; //123*+45*-6-78*+9-
// var str = "1 + 2 * 3 - 4 * 5 - (6 + 7 * 8 - 9)"; //123*+45*-6-78*+9-
// var str = "5 + 2 * 3"; //123*+45*-6-78*+9-
var str = "6 * ( 5 + ( 2 + 3 ) * 8 + 3 )"; //6523+8*+3+*
var RPNStack: Array[String] = generateRPN(str)
println(RPNStack.mkString(","))
println(evalRPN(RPNStack))
}
def generateRPN(expression: String): Array[String] = {
val precedence = Map("+" -> 1, "-" -> 1, "*" -> 2, "/" -> 2)
val output = mutable.Stack[String]()
val stack = mutable.Stack[String]()
def isOperator(token: String): Boolean = {
precedence.contains(token) }
def hasHigherPrecedence(op1: String, op2: String): Boolean = {
precedence(op1) >= precedence(op2)
}
def processOperator(op: String): Unit = {
while (stack.nonEmpty && isOperator(stack.top) && hasHigherPrecedence(stack.top, op)) {
output.push(stack.pop())
}
stack.push(op)
}
def processOperand(operand: String): Unit = {
output.push(operand)
}
def processParenthesis(): Unit = {
while (stack.nonEmpty && stack.top != "(") {
output.push(stack.pop())
}
stack.pop() // 弹出左括号
}
for (token <- expression.split("\\s+")) {
token match {
case "(" => stack.push(token)
case ")" => processParenthesis()
case t if isOperator(t) => processOperator(t)
case _ => processOperand(token)
}
}
while (stack.nonEmpty) {
output.push(stack.pop())
}
output.toArray.reverse
}
def evalRPN(tokens: Array[String]): Int = {
val stack = mutable.Stack[Int]()
for (token <- tokens) {
if (isOperator(token)) {
val operand2 = stack.pop()
val operand1 = stack.pop()
val result = performOperation(token, operand1, operand2)
stack.push(result)
} else {
stack.push(token.toInt)
}
}
stack.pop()
}
def isOperator(token: String): Boolean = {
token == "+" || token == "-" || token == "*" || token == "/"
}
def performOperation(operator: String, operand1: Int, operand2: Int): Int = {
operator match {
case "+" => operand1 + operand2
case "-" => operand1 - operand2
case "*" => operand1 * operand2
case "/" => operand1 / operand2
}
}
}

View File

@ -0,0 +1,10 @@
package com.atguigu.scala
class AAA(bbb:BBB) {
def open(): Unit = {
bbb.open()
println("aaaOpen")
}
}

View File

@ -0,0 +1,15 @@
package com.atguigu.scala
class BBB {
def this(a:Int,b:Int) {
this()
println(a)
println(b)
}
def open() ={
println("open")
}
}

View File

@ -1,6 +1,6 @@
package com.atguigu.scala.chapter03
import com.atguigu.scala.test.User
import com.atguigu.scala.test1.User
object Scala02_Oper {

View File

@ -1,6 +1,6 @@
package com.atguigu.scala.chapter06
import com.atguigu.scala.test.ScalaUser
import com.atguigu.scala.test1.ScalaUser
object Scala09_Object_Instance_4 {

View File

@ -1,6 +1,6 @@
package com.atguigu.scala.chapter06
import com.atguigu.scala.test.ScalaUser
import com.atguigu.scala.test1.ScalaUser
object Scala09_Object_Instance_5 {

View File

@ -14,7 +14,7 @@ object Scala05_Transform extends Parent with MyTrait {
//TODO 3.特征或伴生对象
//TODO 4.其他地方声明(包对象)
//TODO 5.直接导入
import com.atguigu.scala.test.TestTransform._
import com.atguigu.scala.test1.TestTransform._
val user=new User()
user.insertUser()
user.updateUser()

View File

@ -0,0 +1,9 @@
package com.atguigu.scala
object test111 {
def main(args: Array[String]): Unit = {
new AAA(new BBB(1,2)).open()
}
}

View File

@ -185,6 +185,7 @@ public class DataServiceImplTest {
assertEquals(tagName1, result.get(0).getTagName());
assertEquals(tagName2, result.get(1).getTagName());
// if (result.get(0).getTagName().equals(tagName1) && result.get(1).getTagName().equals(tagName2)) {
// DataSample[] datasamples1 = result.get(0).getSamples();
// DataSample[] datasamples2 = result.get(1).getSamples();

View File

@ -1,264 +0,0 @@
#
# A fatal error has been detected by the Java Runtime Environment:
#
# EXCEPTION_ACCESS_VIOLATION (0xc0000005) at pc=0x000000006abea148, pid=12532, tid=0x0000000000004e18
#
# JRE version: Java(TM) SE Runtime Environment (8.0_311-b11) (build 1.8.0_311-b11)
# Java VM: Java HotSpot(TM) 64-Bit Server VM (25.311-b11 mixed mode windows-amd64 compressed oops)
# Problematic frame:
# V [jvm.dll+0x19a148]
#
# Failed to write core dump. Minidumps are not enabled by default on client versions of Windows
#
# If you would like to submit a bug report, please visit:
# http://bugreport.java.com/bugreport/crash.jsp
#
--------------- T H R E A D ---------------
Current thread (0x000002d51ddc9000): JavaThread "JDWP Transport Listener: dt_socket" daemon [_thread_in_vm, id=19992, stack(0x000000ec02500000,0x000000ec02600000)]
siginfo: ExceptionCode=0xc0000005, reading address 0x000002d51d8e2018
Registers:
RAX=0x000002d51d7786d0, RBX=0x0000000000000003, RCX=0x000002d51d8e2008, RDX=0x000002d51d779200
RSP=0x000000ec025ff7b0, RBP=0x000000ec025ff829, RSI=0x00000000000000b6, RDI=0x000002d51d779268
R8 =0x000002d51d779aa8, R9 =0x00007ffedf820000, R10=0x000002d51d779211, R11=0x000002d51fea87d9
R12=0x000002d51fea87d8, R13=0x000000ec025ff8b0, R14=0x000000000000005b, R15=0x00000000000000b6
RIP=0x000000006abea148, EFLAGS=0x0000000000010202
Top of Stack: (sp=0x000000ec025ff7b0)
0x000000ec025ff7b0: 000002d51ddc9000 00000000000000b6
0x000000ec025ff7c0: 0000000000000003 000002d51ddc9000
0x000000ec025ff7d0: 000002d51d779268 000002d51d779268
0x000000ec025ff7e0: 000002d51d779268 000002d51d779268
0x000000ec025ff7f0: 000002d51ddc9000 000002d51d779268
0x000000ec025ff800: 000002d51ddc9000 000002d51d779268
0x000000ec025ff810: 000002d51ddc9000 0000005b00000058
0x000000ec025ff820: 000000b600000072 000000006ac50000
0x000000ec025ff830: 00000000000000b6 0000000000000000
0x000000ec025ff840: 0000000000000000 0000000000000072
0x000000ec025ff850: 000000ec025ff9c0 0000000000000000
0x000000ec025ff860: 0000000000000000 000000ec025ff9c8
0x000000ec025ff870: 000002d51ddc9000 000002d51d779268
0x000000ec025ff880: 0000000000000000 000000006abef64f
0x000000ec025ff890: 000000ec025ff8b0 000002d51fea87d8
0x000000ec025ff8a0: 000002d507c50a01 000002d51d779268
Instructions: (pc=0x000000006abea148)
0x000000006abea128: 10 84 d2 74 0b 41 8b 45 31 f7 d0 48 63 c8 eb 05
0x000000006abea138: 41 0f b7 4d 31 4c 8b 6d 67 48 c1 e1 05 49 03 c8
0x000000006abea148: 48 8b 49 10 44 8b 75 f3 0f b6 c1 66 c1 e0 08 66
0x000000006abea158: c1 e9 08 66 0b c1 66 41 89 44 24 01 84 d2 0f 84
Register to memory mapping:
RAX=0x000002d51d7786d0 is pointing into metadata
RBX=0x0000000000000003 is an unknown value
RCX=0x000002d51d8e2008 is an unknown value
RDX=0x000002d51d779200 is pointing into metadata
RSP=0x000000ec025ff7b0 is pointing into the stack for thread: 0x000002d51ddc9000
RBP=0x000000ec025ff829 is pointing into the stack for thread: 0x000002d51ddc9000
RSI=0x00000000000000b6 is an unknown value
RDI={method} {0x000002d51d779270} 'test' '()V' in 'com/markilue/leecode/listnode/MyLinkedList'
R8 =0x000002d51d779aa8 is pointing into metadata
R9 =0x00007ffedf820000 is an unknown value
R10=0x000002d51d779211 is pointing into metadata
R11=0x000002d51fea87d9 is an unknown value
R12=0x000002d51fea87d8 is an unknown value
R13=0x000000ec025ff8b0 is pointing into the stack for thread: 0x000002d51ddc9000
R14=0x000000000000005b is an unknown value
R15=0x00000000000000b6 is an unknown value
Stack: [0x000000ec02500000,0x000000ec02600000], sp=0x000000ec025ff7b0, free space=1021k
Native frames: (J=compiled Java code, j=interpreted, Vv=VM code, C=native code)
V [jvm.dll+0x19a148]
V [jvm.dll+0x19f64f]
V [jvm.dll+0x3408eb]
C [jdwp.dll+0x4296]
C [jdwp.dll+0xef91]
C [jdwp.dll+0x1f4f5]
C [jdwp.dll+0x1f45e]
V [jvm.dll+0x1ba3aa]
V [jvm.dll+0x23df22]
V [jvm.dll+0x29253c]
C [ucrtbase.dll+0x21bb2]
C [KERNEL32.DLL+0x17034]
C [ntdll.dll+0x52651]
--------------- P R O C E S S ---------------
Java Threads: ( => current thread )
0x000002d51fc40800 JavaThread "Service Thread" daemon [_thread_blocked, id=21716, stack(0x000000ec02c00000,0x000000ec02d00000)]
0x000002d51fba9000 JavaThread "C1 CompilerThread3" daemon [_thread_blocked, id=14316, stack(0x000000ec02b00000,0x000000ec02c00000)]
0x000002d51fb9e800 JavaThread "C2 CompilerThread2" daemon [_thread_blocked, id=19840, stack(0x000000ec02a00000,0x000000ec02b00000)]
0x000002d51fb9d800 JavaThread "C2 CompilerThread1" daemon [_thread_blocked, id=16824, stack(0x000000ec02900000,0x000000ec02a00000)]
0x000002d51fb9b000 JavaThread "C2 CompilerThread0" daemon [_thread_blocked, id=12300, stack(0x000000ec02800000,0x000000ec02900000)]
0x000002d51faf3800 JavaThread "JDWP Command Reader" daemon [_thread_in_native, id=2456, stack(0x000000ec02700000,0x000000ec02800000)]
0x000002d51faf0800 JavaThread "JDWP Event Helper Thread" daemon [_thread_blocked, id=9096, stack(0x000000ec02600000,0x000000ec02700000)]
=>0x000002d51ddc9000 JavaThread "JDWP Transport Listener: dt_socket" daemon [_thread_in_vm, id=19992, stack(0x000000ec02500000,0x000000ec02600000)]
0x000002d51ddbc000 JavaThread "Attach Listener" daemon [_thread_blocked, id=18836, stack(0x000000ec02400000,0x000000ec02500000)]
0x000002d51dd67800 JavaThread "Signal Dispatcher" daemon [_thread_blocked, id=21684, stack(0x000000ec02300000,0x000000ec02400000)]
0x000002d51dd39000 JavaThread "Finalizer" daemon [_thread_blocked, id=10800, stack(0x000000ec02200000,0x000000ec02300000)]
0x000002d51dd30800 JavaThread "Reference Handler" daemon [_thread_blocked, id=21884, stack(0x000000ec02100000,0x000000ec02200000)]
0x000002d507b9a800 JavaThread "main" [_thread_blocked, id=16780, stack(0x000000ec01700000,0x000000ec01800000)]
Other Threads:
0x000002d51dd06800 VMThread [stack: 0x000000ec02000000,0x000000ec02100000] [id=3664]
0x000002d51fc59000 WatcherThread [stack: 0x000000ec02d00000,0x000000ec02e00000] [id=15732]
VM state:not at safepoint (normal execution)
VM Mutex/Monitor currently owned by a thread: None
heap address: 0x0000000081c00000, size: 2020 MB, Compressed Oops mode: 32-bit
Narrow klass base: 0x0000000000000000, Narrow klass shift: 3
Compressed class space size: 1073741824 Address: 0x0000000100000000
Heap:
PSYoungGen total 38400K, used 10018K [0x00000000d5f00000, 0x00000000d8980000, 0x0000000100000000)
eden space 33280K, 30% used [0x00000000d5f00000,0x00000000d68c8bd8,0x00000000d7f80000)
from space 5120K, 0% used [0x00000000d8480000,0x00000000d8480000,0x00000000d8980000)
to space 5120K, 0% used [0x00000000d7f80000,0x00000000d7f80000,0x00000000d8480000)
ParOldGen total 87552K, used 0K [0x0000000081c00000, 0x0000000087180000, 0x00000000d5f00000)
object space 87552K, 0% used [0x0000000081c00000,0x0000000081c00000,0x0000000087180000)
Metaspace used 5032K, capacity 5348K, committed 5504K, reserved 1056768K
class space used 579K, capacity 595K, committed 640K, reserved 1048576K
Card table byte_map: [0x000002d518910000,0x000002d518d10000] byte_map_base: 0x000002d518502000
Marking Bits: (ParMarkBitMap*) 0x000000006b238030
Begin Bits: [0x000002d518fc0000, 0x000002d51af50000)
End Bits: [0x000002d51af50000, 0x000002d51cee0000)
Polling page: 0x000002d507cf0000
CodeCache: size=245760Kb used=1754Kb max_used=1771Kb free=244005Kb
bounds [0x000002d509550000, 0x000002d5097c0000, 0x000002d518550000]
total_blobs=523 nmethods=259 adapters=185
compilation: enabled
Compilation events (10 events):
Event: 0.989 Thread 0x000002d51fba9000 256 3 java.io.File::isInvalid (47 bytes)
Event: 0.989 Thread 0x000002d51fba9000 nmethod 256 0x000002d5096ffdd0 code [0x000002d5096fff40, 0x000002d509700390]
Event: 0.989 Thread 0x000002d51fb9e800 257 4 sun.misc.MetaIndex::mayContain (51 bytes)
Event: 0.991 Thread 0x000002d51fb9b000 nmethod 243 0x000002d509704390 code [0x000002d5097045c0, 0x000002d509705850]
Event: 0.994 Thread 0x000002d51fba9000 258 3 java.lang.Character::charCount (12 bytes)
Event: 0.994 Thread 0x000002d51fba9000 nmethod 258 0x000002d509704010 code [0x000002d509704160, 0x000002d5097042f8]
Event: 0.997 Thread 0x000002d51fb9e800 nmethod 257 0x000002d509706e10 code [0x000002d509706f60, 0x000002d509707498]
Event: 1.000 Thread 0x000002d51fba9000 259 1 java.nio.Buffer::limit (5 bytes)
Event: 1.000 Thread 0x000002d51fba9000 nmethod 259 0x000002d509703d50 code [0x000002d509703ea0, 0x000002d509703fb8]
Event: 1.017 Thread 0x000002d51fb9d800 nmethod 253 0x000002d50970a7d0 code [0x000002d50970aa80, 0x000002d50970c1a8]
GC Heap History (0 events):
No events
Deoptimization events (0 events):
No events
Classes redefined (6 events):
Event: 120.821 Thread 0x000002d51dd06800 redefined class name=com.markilue.leecode.listnode.MyLinkedList, count=1
Event: 120.822 Thread 0x000002d51dd06800 redefined class name=com.markilue.leecode.listnode.ListNode, count=1
Event: 164.527 Thread 0x000002d51dd06800 redefined class name=com.markilue.leecode.listnode.MyLinkedList, count=2
Event: 164.528 Thread 0x000002d51dd06800 redefined class name=com.markilue.leecode.listnode.ListNode, count=2
Event: 308.152 Thread 0x000002d51dd06800 redefined class name=com.markilue.leecode.listnode.MyLinkedList, count=3
Event: 308.152 Thread 0x000002d51dd06800 redefined class name=com.markilue.leecode.listnode.ListNode, count=3
Internal exceptions (7 events):
Event: 0.110 Thread 0x000002d507b9a800 Exception <a 'java/lang/NoSuchMethodError': Method sun.misc.Unsafe.defineClass(Ljava/lang/String;[BII)Ljava/lang/Class; name or signature does not match> (0x00000000d5f07cc0) thrown at [C:\jenkins\workspace\8-2-build-windows-amd64-cygwin\jdk8u311\1894\hot
Event: 0.110 Thread 0x000002d507b9a800 Exception <a 'java/lang/NoSuchMethodError': Method sun.misc.Unsafe.prefetchRead(Ljava/lang/Object;J)V name or signature does not match> (0x00000000d5f07fa8) thrown at [C:\jenkins\workspace\8-2-build-windows-amd64-cygwin\jdk8u311\1894\hotspot\src\share\vm\
Event: 0.817 Thread 0x000002d507b9a800 Exception <a 'java/io/FileNotFoundException'> (0x00000000d62ad468) thrown at [C:\jenkins\workspace\8-2-build-windows-amd64-cygwin\jdk8u311\1894\hotspot\src\share\vm\prims\jni.cpp, line 710]
Event: 0.848 Thread 0x000002d507b9a800 Exception <a 'java/security/PrivilegedActionException'> (0x00000000d638d038) thrown at [C:\jenkins\workspace\8-2-build-windows-amd64-cygwin\jdk8u311\1894\hotspot\src\share\vm\prims\jvm.cpp, line 1523]
Event: 0.848 Thread 0x000002d507b9a800 Exception <a 'java/security/PrivilegedActionException'> (0x00000000d638d430) thrown at [C:\jenkins\workspace\8-2-build-windows-amd64-cygwin\jdk8u311\1894\hotspot\src\share\vm\prims\jvm.cpp, line 1523]
Event: 0.849 Thread 0x000002d507b9a800 Exception <a 'java/security/PrivilegedActionException'> (0x00000000d638fb28) thrown at [C:\jenkins\workspace\8-2-build-windows-amd64-cygwin\jdk8u311\1894\hotspot\src\share\vm\prims\jvm.cpp, line 1523]
Event: 0.849 Thread 0x000002d507b9a800 Exception <a 'java/security/PrivilegedActionException'> (0x00000000d638ff20) thrown at [C:\jenkins\workspace\8-2-build-windows-amd64-cygwin\jdk8u311\1894\hotspot\src\share\vm\prims\jvm.cpp, line 1523]
Events (10 events):
Event: 1527.523 Executing VM operation: GetOrSetLocal
Event: 1527.523 Executing VM operation: GetOrSetLocal done
Event: 1559.202 Executing VM operation: ChangeBreakpoints
Event: 1559.202 Executing VM operation: ChangeBreakpoints done
Event: 1559.805 Executing VM operation: ChangeBreakpoints
Event: 1559.805 Executing VM operation: ChangeBreakpoints done
Event: 1573.008 Executing VM operation: ChangeBreakpoints
Event: 1573.008 Executing VM operation: ChangeBreakpoints done
Event: 1581.175 Executing VM operation: ChangeBreakpoints
Event: 1581.176 Executing VM operation: ChangeBreakpoints done
Dynamic libraries:
0x00007ff61b6b0000 - 0x00007ff61b6f7000 E:\Java\JDK8\bin\java.exe
0x00007ffee8f90000 - 0x00007ffee9188000 C:\WINDOWS\SYSTEM32\ntdll.dll
0x00007ffee7cf0000 - 0x00007ffee7dad000 C:\WINDOWS\System32\KERNEL32.DLL
0x00007ffee6a20000 - 0x00007ffee6cee000 C:\WINDOWS\System32\KERNELBASE.dll
0x00007ffee8050000 - 0x00007ffee80fe000 C:\WINDOWS\System32\ADVAPI32.dll
0x00007ffee7560000 - 0x00007ffee75fe000 C:\WINDOWS\System32\msvcrt.dll
0x00007ffee8dd0000 - 0x00007ffee8e6c000 C:\WINDOWS\System32\sechost.dll
0x00007ffee8990000 - 0x00007ffee8ab5000 C:\WINDOWS\System32\RPCRT4.dll
0x00007ffee7eb0000 - 0x00007ffee8050000 C:\WINDOWS\System32\USER32.dll
0x00007ffee6880000 - 0x00007ffee68a2000 C:\WINDOWS\System32\win32u.dll
0x00007ffee8170000 - 0x00007ffee819a000 C:\WINDOWS\System32\GDI32.dll
0x00007ffee6e40000 - 0x00007ffee6f4b000 C:\WINDOWS\System32\gdi32full.dll
0x00007ffee6f50000 - 0x00007ffee6fed000 C:\WINDOWS\System32\msvcp_win.dll
0x00007ffee6cf0000 - 0x00007ffee6df0000 C:\WINDOWS\System32\ucrtbase.dll
0x00007ffed59c0000 - 0x00007ffed5c5a000 C:\WINDOWS\WinSxS\amd64_microsoft.windows.common-controls_6595b64144ccf1df_6.0.19041.1110_none_60b5254171f9507e\COMCTL32.dll
0x00007ffee88f0000 - 0x00007ffee8920000 C:\WINDOWS\System32\IMM32.DLL
0x00007ffedf820000 - 0x00007ffedf835000 E:\Java\JDK8\jre\bin\vcruntime140.dll
0x00007ffebd920000 - 0x00007ffebd9bb000 E:\Java\JDK8\jre\bin\msvcp140.dll
0x000000006aa50000 - 0x000000006b2b0000 E:\Java\JDK8\jre\bin\server\jvm.dll
0x00007ffee7ce0000 - 0x00007ffee7ce8000 C:\WINDOWS\System32\PSAPI.DLL
0x00007ffed2a20000 - 0x00007ffed2a29000 C:\WINDOWS\SYSTEM32\WSOCK32.dll
0x00007ffed5e10000 - 0x00007ffed5e37000 C:\WINDOWS\SYSTEM32\WINMM.dll
0x00007ffedfb10000 - 0x00007ffedfb1a000 C:\WINDOWS\SYSTEM32\VERSION.dll
0x00007ffee8100000 - 0x00007ffee816b000 C:\WINDOWS\System32\WS2_32.dll
0x00007ffee4f90000 - 0x00007ffee4fa2000 C:\WINDOWS\SYSTEM32\kernel.appcore.dll
0x00007ffee0460000 - 0x00007ffee0470000 E:\Java\JDK8\jre\bin\verify.dll
0x00007ffedf7f0000 - 0x00007ffedf81b000 E:\Java\JDK8\jre\bin\java.dll
0x00007ffedb7d0000 - 0x00007ffedb806000 E:\Java\JDK8\jre\bin\jdwp.dll
0x00007ffee1c90000 - 0x00007ffee1c99000 E:\Java\JDK8\jre\bin\npt.dll
0x00007ffedf8f0000 - 0x00007ffedf920000 E:\Java\JDK8\jre\bin\instrument.dll
0x00007ffedef10000 - 0x00007ffedef28000 E:\Java\JDK8\jre\bin\zip.dll
0x00007ffee81a0000 - 0x00007ffee88e4000 C:\WINDOWS\System32\SHELL32.dll
0x00007ffee4790000 - 0x00007ffee4f24000 C:\WINDOWS\SYSTEM32\windows.storage.dll
0x00007ffee6ff0000 - 0x00007ffee7344000 C:\WINDOWS\System32\combase.dll
0x00007ffee6110000 - 0x00007ffee6140000 C:\WINDOWS\SYSTEM32\Wldp.dll
0x00007ffee7a70000 - 0x00007ffee7b1d000 C:\WINDOWS\System32\SHCORE.dll
0x00007ffee7e30000 - 0x00007ffee7e85000 C:\WINDOWS\System32\shlwapi.dll
0x00007ffee65f0000 - 0x00007ffee660f000 C:\WINDOWS\SYSTEM32\profapi.dll
0x00007ffedf850000 - 0x00007ffedf85a000 E:\Java\JDK8\jre\bin\dt_socket.dll
0x00007ffee5e70000 - 0x00007ffee5eda000 C:\WINDOWS\system32\mswsock.dll
0x00007ffee44a0000 - 0x00007ffee4684000 C:\WINDOWS\SYSTEM32\dbghelp.dll
0x00007ffee68e0000 - 0x00007ffee6962000 C:\WINDOWS\System32\bcryptPrimitives.dll
VM Arguments:
jvm_args: -agentlib:jdwp=transport=dt_socket,address=127.0.0.1:5541,suspend=y,server=n -ea -Didea.test.cyclic.buffer.size=1048576 -javaagent:C:\Users\marklue\AppData\Local\JetBrains\IntelliJIdea2021.1\captureAgent\debugger-agent.jar -Dfile.encoding=UTF-8
java_command: com.intellij.rt.junit.JUnitStarter -ideVersion5 -junit4 com.markilue.leecode.listnode.MyLinkedList,test
java_class_path (initial): D:\software\JetBrains\IntelliJ IDEA 2021.1\lib\idea_rt.jar;D:\software\JetBrains\IntelliJ IDEA 2021.1\plugins\junit\lib\junit5-rt.jar;D:\software\JetBrains\IntelliJ IDEA 2021.1\plugins\junit\lib\junit-rt.jar;E:\Java\JDK8\jre\lib\charsets.jar;E:\Java\JDK8\jre\lib\deploy.jar;E:\Java\JDK8\jre\lib\ext\access-bridge-64.jar;E:\Java\JDK8\jre\lib\ext\cldrdata.jar;E:\Java\JDK8\jre\lib\ext\dnsns.jar;E:\Java\JDK8\jre\lib\ext\jaccess.jar;E:\Java\JDK8\jre\lib\ext\jfxrt.jar;E:\Java\JDK8\jre\lib\ext\localedata.jar;E:\Java\JDK8\jre\lib\ext\nashorn.jar;E:\Java\JDK8\jre\lib\ext\sunec.jar;E:\Java\JDK8\jre\lib\ext\sunjce_provider.jar;E:\Java\JDK8\jre\lib\ext\sunmscapi.jar;E:\Java\JDK8\jre\lib\ext\sunpkcs11.jar;E:\Java\JDK8\jre\lib\ext\zipfs.jar;E:\Java\JDK8\jre\lib\javaws.jar;E:\Java\JDK8\jre\lib\jce.jar;E:\Java\JDK8\jre\lib\jfr.jar;E:\Java\JDK8\jre\lib\jfxswt.jar;E:\Java\JDK8\jre\lib\jsse.jar;E:\Java\JDK8\jre\lib\management-agent.jar;E:\Java\JDK8\jre\lib\plugin.jar;E:\Java\JDK8\jre\lib\resources.jar;E:\Java\JDK8\jre\lib\rt.jar;D:\example\self_example\Leecode\target\classes;E:\maven\apache-maven-3.5.4-bin\RepMaven\org\projectlombok\lombok\1.18.24\lombok-1.18.24.jar;E:\maven\apache-maven-3.5.4-bin\RepMaven\junit\junit\4.13.2\junit-4.13.2.jar;E:\maven\apache-maven-3.5.4-bin\RepMaven\org\hamcrest\hamcrest-core\1.3\hamcrest-core-1.3.jar;C:\Users\marklue\AppData\Local\JetBrains\IntelliJIdea2021.1\captureAgent\debugger-agent.jar
Launcher Type: SUN_STANDARD
Environment Variables:
JAVA_HOME=E:\Java\JDK8
PATH=C:\WINDOWS\system32;C:\WINDOWS;C:\WINDOWS\System32\Wbem;C:\WINDOWS\System32\WindowsPowerShell\v1.0\;C:\WINDOWS\System32\OpenSSH\;D:\software\RAR½âѹ¹¤¾ß\Bandizip\;D:\;oftware\nodejs\;E:\Java\JDK8\bin;E:\maven\apache-maven-3.5.4-bin\apache-maven-3.5.4\bin;E:\scala\scala-2.12.11\bin;D:\software\anaconda\pkgs\python-3.7.11-h6244533_0;D:\software\anaconda\Scripts;D:\software\Git\Git\cmd;D:\software\nodejs;C:\Users\marklue\AppData\Local\Microsoft\WindowsApps;C:\Users\marklue\AppData\Roaming\npm;D:\software\JetBrains\PyCharm 2020.1\bin;
USERNAME=marklue
OS=Windows_NT
PROCESSOR_IDENTIFIER=Intel64 Family 6 Model 142 Stepping 10, GenuineIntel
--------------- S Y S T E M ---------------
OS: Windows 10.0 , 64 bit Build 19041 (10.0.19041.1806)
CPU:total 8 (initial active 8) (4 cores per cpu, 2 threads per core) family 6 model 142 stepping 10, cmov, cx8, fxsr, mmx, sse, sse2, sse3, ssse3, sse4.1, sse4.2, popcnt, avx, avx2, aes, clmul, erms, 3dnowpref, lzcnt, ht, tsc, tscinvbit, bmi1, bmi2, adx
Memory: 4k page, physical 8272104k(2053676k free), swap 11902816k(1707060k free)
vm_info: Java HotSpot(TM) 64-Bit Server VM (25.311-b11) for windows-amd64 JRE (1.8.0_311-b11), built on Sep 27 2021 05:15:14 by "java_re" with MS VC++ 15.9 (VS2017)
time: Mon Sep 5 12:31:48 2022
timezone: Öйú±ê׼ʱ¼ä
elapsed time: 1581.277238 seconds (0d 0h 26m 21s)

View File

@ -1,18 +0,0 @@
package com.markilue.leecode;
import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
/**
*@BelongsProject: Leecode
*@BelongsPackage: com.markilue.leecode
*@Author: markilue
*@CreateTime: 2023-06-01 17:36
*@Description: TODO
*@Version: 1.0
*/
public class Test1 {
}

View File

@ -137,4 +137,62 @@ public class LC_1254_ClosedIsland {
return dfs(grid, i - 1, j) & dfs(grid, i + 1, j) & dfs(grid, i, j - 1) & dfs(grid, i, j + 1);
}
//如何为封闭岛屿:有边界
public int closedIsland3(int[][] grid) {
int result = 0;
for (int i = 0; i < grid.length; i++) {
for (int j = 0; j < grid[0].length; j++) {
if (grid[i][j] == 0) {
if (find(grid, i, j)) {
result++;
}
}
}
}
return result;
}
public boolean find(int[][] grid, int i, int j) {
if (i < 0 || j < 0 || i >= grid.length || j >= grid[0].length) {
return false;//碰到了边界还没有返回,则不是封闭岛屿
}
if (grid[i][j] == 1 || grid[i][j] == 2) {
//遇到了边界
return true;
}
grid[i][j] = 2;
return find(grid, i + 1, j) & find(grid, i - 1, j) & find(grid, i, j + 1) & find(grid, i, j - 1);
}
public int closedIsland4(int[][] grid) {
int result = 0;
for (int i = 0; i < grid.length; i++) {
for (int j = 0; j < grid[0].length; j++) {
if (grid[i][j] == 0 && dfs2(grid, i, j)) {
result++;
}
}
}
return result;
}
private boolean dfs2(int[][] grid, int i, int j) {
//碰到1则为封闭岛屿;超出边界则为非封闭岛屿
if (i < 0 || j < 0 || i >= grid.length || j >= grid[0].length) {
return false;
}
if (grid[i][j] == 1) {
return true;
}
grid[i][j] = 1;
return dfs2(grid, i + 1, j) & dfs2(grid, i, j + 1) & dfs2(grid, i - 1, j) & dfs2(grid, i, j - 1);
}
}

View File

@ -143,6 +143,7 @@ public class LC_127_LadderLength {
adjacent.add(new ArrayList<>());
for (int i = 0; i < wordList.size(); ++i) {
String s = wordList.get(i);
//构建图
for (int j = i + 1; j < wordList.size(); ++j) {
if (judge(s, wordList.get(j))) {
adjacent.get(i).add(j);
@ -153,6 +154,7 @@ public class LC_127_LadderLength {
return bfs(wordList.size() - 1, endIndex, adjacent, new boolean[wordList.size()]);
}
//i为开始寻找的单词的index;j为结束的单词的index
private int bfs(int i, int j, List<List<Integer>> adjacent, boolean[] visited) {
int distance = 0;
ArrayDeque<Integer> queue = new ArrayDeque<>();
@ -163,8 +165,8 @@ public class LC_127_LadderLength {
for (int k = 0; k < size; ++k) {
int v = queue.pollFirst();
visited[v] = true;
if (v == j) return distance;
List<Integer> edges = adjacent.get(v);
if (v == j) return distance;//找到了
List<Integer> edges = adjacent.get(v);//获取他的子矩阵挨个遍历
for (int e : edges) {
if (!visited[e]) {
queue.addLast(e);

View File

@ -78,4 +78,26 @@ public class LC_82_DeleteDuplicatesII {
}
public ListNode deleteDuplicates3(ListNode head) {
if (head == null) return null;
ListNode fake = new ListNode();
fake.next = head;
ListNode temp = fake;
while (temp.next != null && temp.next.next != null) {
//只有这样才能有重复的,才需要删除
if (temp.next.val == temp.next.next.val) {
ListNode tempNext = temp.next;
while (tempNext.next != null && tempNext.val == tempNext.next.val) {
tempNext = tempNext.next;
}
temp.next = tempNext.next;
} else {
temp = temp.next;
}
}
return fake.next;
}
}

View File

@ -91,7 +91,7 @@ public class LC_503_NextGreaterElements {
stack.pop();
}
if (i < n) result[i] = stack.isEmpty() ? -1 : stack.peek();
stack.push(nums[i%n]);
stack.push(nums[i % n]);
}
return result;
@ -114,4 +114,25 @@ public class LC_503_NextGreaterElements {
return result;
}
public int[] nextGreaterElements3(int[] nums) {
int n = nums.length;
ArrayDeque<Integer> stack = new ArrayDeque<>();
int[] result = new int[n];
for (int i = n * 2 - 2; i >= 0; i--) {
while (!stack.isEmpty() && stack.peek() <= nums[i % n]) {//寻找第一个比当前数大的数
stack.pop();
}
if (i < n) result[i] = stack.isEmpty() ? -1 : stack.peek();
stack.push(nums[i % n]);
}
return result;
}
}

View File

@ -1,5 +1,7 @@
package com.markilue.leecode.hot100.interviewHot.union_find.second;
import org.junit.Test;
import java.util.ArrayList;
/**
@ -12,6 +14,12 @@ import java.util.ArrayList;
*/
public class LC_685_FindRedundantConnection {
@Test
public void test() {
int[][] edges = {{1, 2}, {1, 3}, {2, 3}};
System.out.println(findRedundantDirectedConnection(edges));
}
int[] father;//父节点
int n;//父节点个数
@ -84,7 +92,7 @@ public class LC_685_FindRedundantConnection {
ArrayList<Integer> twoDegree = new ArrayList<>();
//判断入度为2的节点该节点一定有子节点需要删除;反向遍历,因为后面的删除优先级更高
for (int i = edges.length-1; i >=0; i--) {
for (int i = edges.length - 1; i >= 0; i--) {
if (inDegree[edges[i][1]] == 2) {
twoDegree.add(i);//这个节点需要删除
}

View File

@ -0,0 +1,109 @@
package com.markilue.leecode.hot100.interviewHot.union_find.second;
import org.junit.Test;
import java.util.ArrayList;
/**
*@BelongsProject: Leecode
*@BelongsPackage: com.markilue.leecode.hot100.interviewHot.union_find.second
*@Author: markilue
*@CreateTime: 2023-06-12 09:58
*@Description: TODO
*@Version: 1.0
*/
public class LC_685_FindRedundantConnection1 {
@Test
public void test() {
int[][] edges = {{1, 2}, {1, 3}, {2, 3}};
System.out.println(findRedundantDirectedConnection(edges));
}
int[] father;
private void init(int[] father) {
for (int i = 0; i < father.length; i++) {
father[i] = i;
}
}
private int find(int u) {
if (father[u] == u) return u;
father[u] = find(father[u]);
return father[u];
}
private void union(int u, int v) {
u = find(u);
v = find(v);
if (u == v) return;
father[v] = u;
}
private boolean same(int u, int v) {
u = find(u);
v = find(v);
return u == v;
}
private int[] removeOne(int[][] edges) {
init(father);
for (int[] edge : edges) {
if (same(edge[0], edge[1])) {
return edge;
}
union(edge[0], edge[1]);
}
return null;
}
private boolean removeIfCan(int[][] edges, int i) {
init(father);
//遇上i就跳过
for (int i1 = 0; i1 < edges.length; i1++) {
if (i1 == i) continue;
if (same(edges[i1][0], edges[i1][1])) {
return false;
}
union(edges[i1][0], edges[i1][1]);
}
return true;
}
//虽然是有向图但是一共就三种情况两种入度为2的情况可以直接判断出来最后可以转为无向图的情况
public int[] findRedundantDirectedConnection(int[][] edges) {
father = new int[1010];
//判断入度为2的情况
int[] countDegree = new int[1010];
for (int[] edge : edges) {
countDegree[edge[1]]++;
}
ArrayList<Integer> twoDegree = new ArrayList<>();//入度为2的节点
for (int i = edges.length - 1; i >= 0; i--) {
if (countDegree[edges[i][1]] > 1) twoDegree.add(i);
}
if (!twoDegree.isEmpty()) {
if (removeIfCan(edges, twoDegree.get(0))) {
return edges[twoDegree.get(0)];
} else {
return edges[twoDegree.get(1)];
}
}
//只用删除一个
return removeOne(edges);
}
}

View File

@ -21,7 +21,7 @@ public class T34_75_SortColors {
@Test
public void test() {
int[] nums = {2, 0, 2, 1, 1, 0};
sortColors1(nums);
sortColors3(nums);
System.out.println(Arrays.toString(nums));
}
@ -99,4 +99,28 @@ public class T34_75_SortColors {
}
//三刷
public void sortColors3(int[] nums) {
int index0 = 0;
int index1 = 0;
for (int i = 0; i < nums.length; i++) {
if (nums[i] == 0) {
swap(nums, index0, i);
if (index0 < index1) {//交换到1了交换回来
swap(nums, index1, i);
}
index1++;
index0++;
} else if (nums[i] == 1) {
swap(nums, index1, i);
index1++;
}
}
}
}

View File

@ -99,4 +99,22 @@ public class T49_124_MaxPathSum {
return Math.max(left, right) + root.val;
}
public int maxPathSum3(TreeNode root) {
findCurMax(root);
return maxSum;
}
//返回要当前节点的最大值;不要的情况已经在子节点中讨论过了
public int findCurMax(TreeNode node) {
if (node == null) {
return 0;
}
int left = Math.max(findCurMax(node.left), 0);
int right = Math.max(findCurMax(node.right), 0);
maxSum = Math.max(maxSum, left + right + node.val);
return Math.max(left, right) + node.val;
}
}

View File

@ -203,3 +203,106 @@ class LRUCache {
}
}
class LRUCache1 {
public static void main(String[] args) {
LRUCache1 lRUCache = new LRUCache1(2);
lRUCache.put(1, 1); // 缓存是 {1=1}
lRUCache.put(2, 2); // 缓存是 {1=1, 2=2}
System.out.println(lRUCache.get(1)); // 返回 1
lRUCache.put(3, 3); // 该操作会使得关键字 2 作废缓存是 {1=1, 3=3}
System.out.println(lRUCache.get(2)); // 返回 -1 (未找到)
lRUCache.put(4, 4); // 该操作会使得关键字 1 作废缓存是 {4=4, 3=3}
System.out.println(lRUCache.get(1)); // 返回 -1 (未找到)
System.out.println(lRUCache.get(3)); // 返回 3
System.out.println(lRUCache.get(4)); // 返回 4
}
Map<Integer,Node> cache;
int capacity;
int size;
Node head;
Node tail;
public LRUCache1(int capacity){
this.capacity=capacity;
cache =new HashMap<>();
size=0;
head =new Node();
tail=new Node();
head.next=tail;
tail.pre=head;
}
public int get(int key) {
Node node = cache.get(key);
if(node==null){
return -1;
}else{
//将该节点挪到头部
deleteNode(node);
removeToHead(node);
return node.value;
}
}
public void put(int key, int value) {
Node node = cache.get(key);
if(node==null){
//没有则添加
if(size==capacity){
//满了,删除
cache.remove(tail.pre.key);
deleteNode(tail.pre);
size--;
}
Node newNode = new Node(key,value);
removeToHead(newNode);
cache.put(key,newNode);
size++;
}else{
//更新,将该节点挪到头部
node.value=value;
deleteNode(node);
removeToHead(node);
}
}
private void deleteNode(Node node){
node.pre.next=node.next;
node.next.pre=node.pre;
}
private void removeToHead(Node node){
node.next=head.next;
head.next.pre=node;
head.next=node;
node.pre=head;
}
class Node {
int key;
int value;
Node pre;
Node next;
public Node() {
}
public Node(int key, int value) {
this.key = key;
this.value = value;
}
}
}

View File

@ -20,7 +20,7 @@ public class T67_221_MaximalSquare {
{'1', '1', '1', '1', '1'},
{'1', '0', '0', '1', '0'}
};
System.out.println(maximalSquare1(matrix));
System.out.println(maximalSquare2(matrix));
}
@Test
@ -100,4 +100,34 @@ public class T67_221_MaximalSquare {
return result * result;
}
public int maximalSquare2(char[][] matrix) {
int m = matrix.length;
int n = matrix[0].length;
int[][] dp = new int[m][n];
int result = Integer.MIN_VALUE;
for (int i = 0; i < n; i++) {
dp[0][i] = matrix[0][i] == '1' ? 1 : 0;
result = Math.max(result,dp[0][i]);
}
for (int i = 0; i < m; i++) {
dp[i][0] = matrix[i][0] == '1' ? 1 : 0;
result = Math.max(result,dp[0][i]);
}
for (int i = 1; i < m; i++) {
for (int j = 1; j < n; j++) {
if (matrix[i][j] == '1') {
dp[i][j] = Math.min(Math.min(dp[i - 1][j], dp[i][j - 1]), dp[i - 1][j - 1]) + 1;
}
if (result < dp[i][j]) result = dp[i][j];
}
}
return result * result;
}
}

View File

@ -33,4 +33,22 @@ public class T69_234_IsPalindrome {
return false;
}
public boolean isPalindrome1(ListNode head) {
root = head;
return find(head);
}
public boolean find(ListNode node) {
if (node == null) {
return true;
}
if (find(node.next) && node.val == root.val) {
root = root.next;
return true;
}
return false;
}
}

View File

@ -19,7 +19,7 @@ public class T72_239_MaxSlidingWindow {
public void test() {
int[] nums = {1, 3, -1, -3, 5, 3, 6, 7};
int k = 3;
System.out.println(Arrays.toString(maxSlidingWindow1(nums, k)));
System.out.println(Arrays.toString(maxSlidingWindow2(nums, k)));
}
@Test
@ -110,4 +110,37 @@ public class T72_239_MaxSlidingWindow {
}
//构造一个单调栈只有后面的值比前面的值小前面的值就是无用的
public int[] maxSlidingWindow2(int[] nums, int k) {
ArrayDeque<Integer> stack = new ArrayDeque<>();
int[] result = new int[nums.length - k + 1];
//构造第一个窗口
for (int i = 0; i < k; i++) {
while (!stack.isEmpty() && nums[stack.peekLast()] <= nums[i]) {
stack.pollLast();
}
stack.offerLast(i);
}
result[0] = nums[stack.peekFirst()];
for (int i = k; i < nums.length; i++) {
//先排除过期元素
while (!stack.isEmpty() && i - stack.peekFirst() >= k) {
stack.pollFirst();
}
//添加在合适的位置
while (!stack.isEmpty() && nums[stack.peekLast()] <= nums[i]) {
stack.pollLast();
}
stack.offerLast(i);
result[i - k + 1] = stack.isEmpty() ? -1 : nums[stack.peekFirst()];
}
return result;
}
}

View File

@ -103,11 +103,11 @@ public class T79_301_RemoveInvalidParentheses {
}
//判断完之后就需要进行删除同时不可能存在left,right都大于0的情况
List<String> result = new ArrayList<>();
remove(s, left, right, result,0);
remove(s, left, right, result, 0);
return result;
}
public void remove(String s, int left, int right, List<String> res,int start) {
public void remove(String s, int left, int right, List<String> res, int start) {
if (left == 0 && right == 0) {
if (isValid(s)) {
res.add(s);
@ -118,17 +118,62 @@ public class T79_301_RemoveInvalidParentheses {
//必须在删除的后面继续删才可以
for (int i = start; i < s.length(); i++) {
//去重
if(i>start&&s.charAt(i)==s.charAt(i-1))continue;
if (i > start && s.charAt(i) == s.charAt(i - 1)) continue;
//不够了
if (left + right > s.length() - i) return;
if (left > 0 && s.charAt(i) == '(') {
//可以移除左边
remove(s.substring(0, i) + s.substring(i + 1), left - 1, right, res,i);
remove(s.substring(0, i) + s.substring(i + 1), left - 1, right, res, i);
}
if (right > 0 && s.charAt(i) == ')') {
//可以移除左边
remove(s.substring(0, i) + s.substring(i + 1), left, right - 1, res,i);
remove(s.substring(0, i) + s.substring(i + 1), left, right - 1, res, i);
}
}
}
//判断左括号右括号哪边多哪边多就把哪边的删了
public List<String> removeInvalidParentheses2(String s) {
int left = 0;
int right = 0;
for (int i = 0; i < s.length(); i++) {
char cur = s.charAt(i);
if (cur == '(') {
left++;
} else if (cur == ')') {
if (left > 0) {
left--;
} else {
right++;
}
}
}
List<String> result = new ArrayList<>();
remove(left, right, s, result, 0);
return result;
}
public void remove(int left, int right, String s, List<String> result, int start) {
if (left == 0 && right == 0) {
if (isValid(s)) {
result.add(new String(s));
}
return;
}
for (int i = start; i < s.length(); i++) {
char cur = s.charAt(i);
if (i > start && cur == s.charAt(i - 1)) {
continue;
}
if (left > 0 && cur == '(') {
remove(left - 1, right, s.substring(0, i) + s.substring(i + 1), result, i);
}
if (right > 0 && cur == ')') {
remove(left, right - 1, s.substring(0, i) + s.substring(i + 1), result, i);
}
}
}

View File

@ -0,0 +1,41 @@
package com.markilue.leecode.interview.OPPO.T0411;
import java.util.Scanner;
/**
*@BelongsProject: Leecode
*@BelongsPackage: com.markilue.leecode.interview.OPPO.T0411
*@Author: markilue
*@CreateTime: 2023-06-14 10:41
*@Description: TODO
*@Version: 1.0
*/
public class Question1 {
public static void main(String[] args) {
Scanner sc = new Scanner(System.in);
String s = sc.next();
solve(s);
}
private static void solve(String s) {
int left = 0;
int right = 0;
int xiaochu = 0;
for (int i = 0; i < s.length(); i++) {
char cur = s.charAt(i);
if (cur == '(') {
left++;
} else if (cur == ')') {
if (left > 0) {
left--;
xiaochu++;
} else {
right++;
}
}
}
System.out.println(s.length() - xiaochu);
}
}

View File

@ -0,0 +1,47 @@
package com.markilue.leecode.interview.OPPO.T0411;
import java.util.Arrays;
import java.util.Scanner;
/**
*@BelongsProject: Leecode
*@BelongsPackage: com.markilue.leecode.interview.OPPO.T0411
*@Author: markilue
*@CreateTime: 2023-06-14 11:02
*@Description: TODO
*@Version: 1.0
*/
public class Question2 {
public static void main(String[] args) {
Scanner sc = new Scanner(System.in);
int n = sc.nextInt();
solve(n);
}
private static void solve(int n) {
if (n % 2 == 0) {
if (n == 2) {
System.out.println(2);
return;
}
System.out.println(cal(n / 2) * 2 * 2 % mod);
} else {
System.out.println(cal(n / 2 + 1) * cal(n / 2) % mod);
}
}
static long[] memo = new long[(int) 1e5];
static long mod = (long) (1e9 + 7);
public static long cal(int n) {
if (memo[n] != 0) {
return memo[n];
} else if (n == 1) {
return 1;
}
memo[n] = n * cal(n - 1) % mod;
return memo[n];
}
}

View File

@ -0,0 +1,44 @@
package com.markilue.leecode.interview.baidu.T0410;
import java.util.Arrays;
import java.util.Scanner;
/**
*@BelongsProject: Leecode
*@BelongsPackage: com.markilue.leecode.interview.baidu.T0410
*@Author: markilue
*@CreateTime: 2023-06-14 11:32
*@Description: TODO
*@Version: 1.0
*/
public class Question1 {
public static void main(String[] args) {
Scanner sc = new Scanner(System.in);
int n = sc.nextInt();
int k = sc.nextInt();
int[] nums = new int[n];
for (int i = 0; i < n; i++) {
nums[i] = sc.nextInt();
}
solve(nums, k);
}
//猜测:前k-1个单独分最小的前k-1个数;后面全在一起
private static void solve(int[] nums, int k) {
Arrays.sort(nums);
double result = 0;
//前k-1个单独是一个
for (int i = 0; i < k - 1; i++) {
result += nums[i];
}
//后面全在一起
double temp = 0;
for (int i = k - 1; i < nums.length; i++) {
temp += nums[i];
}
double avg = temp / (nums.length - k + 1);
System.out.println((result+avg));
}
}

View File

@ -0,0 +1,54 @@
package com.markilue.leecode.interview.huawei.T0412;
import java.util.Arrays;
import java.util.Scanner;
/**
*@BelongsProject: Leecode
*@BelongsPackage: com.markilue.leecode.interview.huawei.T0412
*@Author: markilue
*@CreateTime: 2023-06-12 11:38
*@Description:
* TODO 交易系统的降级策略:
* 有一个核心交易系统接口被N个上游系统调用每个上游系统的调用量R=[R1,R2.....,RN].
* 由于核心交易系统集群故障需要暂时系统降级限制调用核心交易系统能接受的最大调用量为cnt
* 设置降级规则如下
* 如果sum(R1.R2..RN)小于等于cnt则全部可以正常调用返回-1;
* 如果sum(R1.R2....RN)大于cnt设置一个阈值limit
* 如果某个上游系统发起的调用量超过limit就将该上游系统的调用量限制为limit
* 其余未达到limit的系统可以正常发起调用
* 求出这个最大的limit (limit可以为0)
* 此题目对效率有要求请选择高效的方式
*@Version: 1.0
*/
public class Question1 {
public static void main(String[] args) {
Scanner sc = new Scanner(System.in);
int[] nums = Arrays.stream(sc.nextLine().split(" ")).mapToInt(Integer::parseInt).toArray();
int threshold = Integer.parseInt(sc.nextLine());
solve(nums, threshold);
}
//二分寻找最大值
private static void solve(int[] nums, int threshold) {
int max = Math.min(threshold,(int) 1e5);
int min = threshold / nums.length;
while (min < max) {
int mid = min + ((max - min + 1) >> 1);
if (check(nums, mid, threshold)) min = mid;
else max = mid - 1;
}
System.out.println(min);
}
private static boolean check(int[] nums, int max, int threshold) {
int result = 0;
for (int i = 0; i < nums.length; i++) {
if (nums[i] < max) result += nums[i];
else result += max;
}
return result <= threshold;
}
}

View File

@ -0,0 +1,72 @@
package com.markilue.leecode.interview.huawei.T0412;
import java.util.*;
/**
*@BelongsProject: Leecode
*@BelongsPackage: com.markilue.leecode.interview.huawei.T0412
*@Author: markilue
*@CreateTime: 2023-06-13 11:24
*@Description:
* TODO 获取最多的食物:
* 主办方设计了一个获取食物的游戏
* 游戏的地图由N个方格组成每个方格上至多2个传送门通过传送门可将参与者传送至指定的其它方格
* 同时每个方格上标注了三个数字:
* (1) 第一个数字id:代表方格的编号从0到N-1每个方格各不相同
* (2)第二个数字parent-id:代表从编号为parent-id的方格可以通过传送门传送到当前方格(-1则表示没有任何方格可以通过传送门传送到此方格这样的方格在地图中有且仅有一个)
* (3)第三个数字value: 取值在[100100]的整数值正整数代表参与者得到相队取值单位的食物负整数代表失去相应数值单位的食物(参与者可能存在临时持有食物为负数的情况)0则代表无变化
*@Version: 1.0
*/
public class Question2 {
static int max = Integer.MIN_VALUE;
public static void main(String[] args) {
Scanner sc = new Scanner(System.in);
int num = sc.nextInt();
List<List<Node>> edges = new ArrayList<>();
HashMap<Integer, Integer> map = new HashMap<>();//<id,value>
for (int i = 0; i < num; i++) {
edges.add(new ArrayList<>());
}
for (int i = 0; i < num; i++) {
int id = sc.nextInt();
int parentId = sc.nextInt();
int value = sc.nextInt();
map.put(id, value);
if (parentId != -1) {
edges.get(parentId).add(new Node(id, value));
}
}
for (int i = 0; i < num; i++) {
solve(edges, map, 0, i);
}
System.out.println(max);
}
public static void solve(List<List<Node>> edges, Map<Integer, Integer> map, int curValue, int curIndex) {
curValue += map.get(curIndex);
max = Math.max(max, curValue);
List<Node> children = edges.get(curIndex);
for (Node child : children) {
solve(edges, map, curValue, child.id);
}
}
static class Node {
int value;
int id;
public Node() {
}
public Node(int id, int value) {
this.id = id;
this.value = value;
}
}
}

View File

@ -1,5 +1,6 @@
package com.markilue.leecode.interview.huawei.T0531;
import com.markilue.leecode.tree.TreeNode;
import org.junit.Test;
import java.util.Scanner;
@ -38,7 +39,7 @@ public class Question3 {
{-2, -3, 4},
};
// calculateMaxRectangleSum(2, 3, income);
calculate(2, 3, income);
calculate1(2, 3, income);
}
private static void calculateMaxRectangleSum(int m, int n, int[][] matrix) {
@ -104,4 +105,40 @@ public class Question3 {
}
//二刷尝试:由于需要计算那一块的面积但是不知道那一块的具体大小所以考虑使用前缀和进行计算
private static void calculate1(int m, int n, int[][] matrix) {
int[][] prefix = new int[m + 1][n + 1];
//构造前缀和数组
for (int i = 1; i < m + 1; i++) {
for (int j = 1; j < n + 1; j++) {
prefix[i][j] = prefix[i - 1][j] + prefix[i][j - 1] - prefix[i - 1][j - 1] + matrix[i - 1][j - 1];
}
}
//挨个遍历寻找面积最大值
int result = Integer.MIN_VALUE;
int edge = 0;
for (int i = 1; i < m + 1; i++) {
for (int j = 1; j < n + 1; j++) {//左上角
for (int k = i; k < m + 1; k++) {
for (int l = j; l < n + 1; l++) {//右下角
int cur = prefix[k][l] - prefix[i - 1][l] - prefix[k][j - 1] + prefix[i - 1][j - 1];
if (cur > result) {
result = cur;
edge = (k - i + 1) * (l - j + 1);
}
}
}
}
}
System.out.println(edge + " " + result);
}
}

View File

@ -0,0 +1,90 @@
package com.markilue.leecode.interview.meituan.T0415;
import java.util.Arrays;
import java.util.Scanner;
/**
*@BelongsProject: Leecode
*@BelongsPackage: com.markilue.leecode.interview.meituan.T0415
*@Author: markilue
*@CreateTime: 2023-06-09 18:41
*@Description: TODO
*@Version: 1.0
*/
public class NestingDolls {
static int n;
static Doll[] dolls;
public static void main(String[] args) {
Scanner in = new Scanner(System.in);
n = in.nextInt();
dolls = new Doll[n];
for (int i = 0; i < n; ++i) {
int a = in.nextInt(), b = in.nextInt(), c = in.nextInt();
dolls[i] = new Doll(a, b, c);
}
Arrays.sort(dolls);//按最大空间有小到大排序
int minCost = 0;
for (int i = 0; i < n; ++i) {
if (!dolls[i].used) {
int cost = insertDoll(i, dolls[i].b);
minCost += cost;
}
}
System.out.println(minCost);
in.close();
}
// 将第 i 个套娃插入内部大小为 size 的套娃中
static int insertDoll(int i, int size) {
dolls[i].used = true;
int j = findSmallest(size);
if (j == -1) {//没有找到能够放在里面的需要重新开辟
dolls[i].minSize = size;
dolls[i].minCost = dolls[i].c * size;//由于都放不进所以最小值就是全部的
return dolls[i].minCost;
} else {
int cost = insertDoll(j, dolls[i].a) + dolls[i].c * (size - dolls[j].a);
if (cost < dolls[i].minCost) {
dolls[i].minSize = size- dolls[j].a;
dolls[i].minCost = cost;
}
return cost;
}
}
// 寻找最小的被 size 占据的空间能够容纳的套娃
static int findSmallest(int size) {
int j = -1;
for (int i = 0; i < n; ++i) {
if (!dolls[i].used && dolls[i].a <= size)
if (j == -1 || dolls[i].minSize < dolls[j].minSize) j = i;
}
return j;
}
static class Doll implements Comparable<Doll> {
int a;
int b;
int c;
boolean used = false; // 是否已经被放置
int minSize; // 占据的最小内部空间
int minCost; // 最小花费
Doll(int a, int b, int c) {
this.a = a;
this.b = b;
this.c = c;
}
@Override
public int compareTo(Doll other) {
return Integer.compare(a, other.a);
}
}
}

View File

@ -0,0 +1,51 @@
package com.markilue.leecode.interview.meituan.T0415;
import java.util.Scanner;
/**
*@BelongsProject: Leecode
*@BelongsPackage: com.markilue.leecode.interview.meituan.T0415
*@Author: markilue
*@CreateTime: 2023-06-06 11:38
*@Description:
* TODO 字符串前缀:
* 现在有两个字符串S和T你需要对S进行若干次操作使得S是T的一个前缀空串也是一个前缀
* 每次操作可以修改S的一个字符或者删除一个S末尾的字符小团需要写一段程序输出最少需要操作的次数
*
*@Version: 1.0
*/
public class Question1 {
public static void main(String[] args) {
Scanner sc = new Scanner(System.in);
int count = sc.nextInt();
for (int i = 0; i < count; i++) {
String S = sc.next();
String T = sc.next();
solve(S, T);
}
}
//为什么不是动态规划因为题目要求删只能删除S的末尾
private static void solve(String s, String t) {
int result = 0;
int pos = s.length() - 1;
//如果S是T的前缀则S一定要比T短
if (s.length() > t.length()) {
result += s.length() - t.length();
pos = t.length() - 1;
}
//能修改就修改;不能修改再删除
for (int i = pos; i >= 0; i--) {
if (t.charAt(i) != s.charAt(i)) {
result++;
}
}
System.out.println(result);
}
}

View File

@ -0,0 +1,49 @@
package com.markilue.leecode.interview.meituan.T0415;
import java.util.Scanner;
/**
*@BelongsProject: Leecode
*@BelongsPackage: com.markilue.leecode.interview.meituan.T0415
*@Author: markilue
*@CreateTime: 2023-06-07 10:28
*@Description:
* TODO 小美分糖:
* 某一天小美从商店买了两种糖果分别买了a个和b个要分给班上n个小朋友为了不浪费每块糖果都得恰好分到一个小朋友
* 另外两种糖果一起吃的话味道其实并不好所以每一个小朋友都只能得到其中一种糖果
* 小美希望分得最少糖果的那个小朋友能得到尽量多的糖果小美的任务是求得这个数量是多少
*@Version: 1.0
*/
public class Question2 {
public static void main(String[] args) {
Scanner sc = new Scanner(System.in);
int count = sc.nextInt();
for (int i = 0; i < count; i++) {
int n = sc.nextInt();
int a = sc.nextInt();
int b = sc.nextInt();
solve(n, a, b);
}
}
//挨个遍历均分看看谁的最小更大
private static void solve(int n, int a, int b) {
if (a > b) {
solve(n, b, a);
return;
}
int min = Integer.MIN_VALUE;
for (int i = 1; i < n; i++) {
int curMin = Math.min(a / i, b / (n - i));
if (min > curMin) {
break;//在递减了直接break
} else {
min = curMin;
}
}
System.out.println(min);
}
}

View File

@ -0,0 +1,142 @@
package com.markilue.leecode.interview.meituan.T0415;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
/**
*@BelongsProject: Leecode
*@BelongsPackage: com.markilue.leecode.interview.meituan.T0415
*@Author: markilue
*@CreateTime: 2023-06-07 11:00
*@Description:
* TODO 交通规划:
* A国有n个城市这n个城市排成一列依次编号为1,2,3,...,n
* 一开始这n座城市之间都没有任何交通路线于是政府打算修建一些铁路来进行交通规划
* 接下来T天每一天会进行如下操作的其中一种
* - L x表示编号为 x 的城市与其左边的城市之间修建一条铁路如果 x 左边没有城市或者已经修建了铁路则无视该操作
* - R x表示编号为 x 的城市与其右边的城市之间修建一条铁路如果 x 右边没有城市或者已经修建了铁路则无视该操作
* - Q x表示查询 x 往左边和往右边最远能到达的城市编号
* 你的任务是模拟以上操作并对于每一条Q x操作输出对应的答案
*@Version: 1.0
*/
public class Question3 {
public static void main(String[] args) {
Scanner sc = new Scanner(System.in);
int total = sc.nextInt();
//构建图
List<Node> edge = new ArrayList<>();
for (int i = 0; i < total; i++) {
edge.add(new Node(i));
}
int count = sc.nextInt();
for (int i = 0; i < count; i++) {
String action = sc.next();
int node = sc.nextInt() - 1;//num => index
if (action.equals("L") && node - 1 >= 0) {
edge.get(node).left = edge.get(node - 1);
} else if (action.equals("R") && node + 1 < edge.size()) {
edge.get(node).right = edge.get(node + 1);
} else if (action.equals("Q")) {
//查询
Node cur = edge.get(node);
Node tempLeft = cur;
while (tempLeft.left != null) {
tempLeft = tempLeft.left;
}
Node tempRight = cur;
while (tempRight.right != null) {
tempRight = tempRight.right;
}
System.out.println((tempLeft.val + 1) + " " + (tempRight.val + 1));
}
}
}
static class Node {
Node left;
Node right;
int val;
public Node() {
}
public Node(int val) {
this.val = val;
}
}
int[] father;
public void init(int[] father) {
for (int i = 0; i < father.length; i++) {
father[i] = i;
}
}
public int find(int x) {
if (x == father[x]) return x;
father[x] = find(father[x]);
return father[x];
}
public void union(int u, int v) {
u = find(u);
v = find(v);
if (u == v) return;
father[v] = u;
}
//并查集:这题本质上就是一个无向图考察连通性因此可以使用并查集简化计算
public void solve1() {
Scanner sc = new Scanner(System.in);
int total = sc.nextInt();
//构建图
father = new int[total + 2];
init(father);
int count = sc.nextInt();
for (int i = 0; i < count; i++) {
String action = sc.next();
int node = sc.nextInt();//num => index
if (action.equals("L")) {
union(node, node - 1);
} else if (action.equals("R")) {
union(node, node + 1);
} else {
//查询 左边二分找到连通的最小值
int l = 1;
int r = node;
while (l < r) {
int mid = l + ((r - l) >> 1);
if (find(node) == find(mid)) r = mid;//父亲是一样的我们认为是联通的,所以缩小范围找更小的
else l = mid + 1;
}
int res1 = r;
//查询 右边二分找到连通的最大值
l = node;
r = total;
while (l < r) {
int mid = l + ((r - l) >> 1);
if (find(node) == find(mid)) l = mid;//父亲是一样的我们认为是联通的,所以缩小范围找更大的
else r = mid - 1;
}
System.out.println(res1 + " " + r);
}
}
}
}

View File

@ -0,0 +1,92 @@
package com.markilue.leecode.interview.meituan.T0415;
import org.junit.Test;
import java.util.*;
/**
*@BelongsProject: Leecode
*@BelongsPackage: com.markilue.leecode.interview.meituan.T0415
*@Author: markilue
*@CreateTime: 2023-06-09 13:11
*@Description:
* TODO 小美玩套娃:
* 小美最近喜欢上了玩套娃
* 具体的小美有 n 个套娃 i 个套娃的大小为 ai内部空间为 bi(biai)
* 对于两个套娃x,y x能放入y中当且仅当axby 且放入后会占据 y 大小为 ax 的内部空间 y 的内部空间剩下 by-ax
* 每个套娃只能放在另外的一个套娃内每个套娃内部也只能放一个套娃当然内部放的这个套娃可以内部还有套娃
* 显然套娃是套的越多越好于是小美给每个套娃定义了一个价值 ci
* 如果套完之后套娃 i 还剩 k 的内部空间小美需要付出ci*k 的花费总花费为所有套娃的花费之和现在小美想知道最小的花费为多少
*@Version: 1.0
*/
public class Question4 {
public static void main(String[] args) {
Scanner sc = new Scanner(System.in);
int n = sc.nextInt();
int[] out = new int[n];
int[] in = new int[n];
int[] payment = new int[n];
for (int i = 0; i < n; i++) {
out[i] = sc.nextInt();
}
for (int i = 0; i < n; i++) {
in[i] = sc.nextInt();
}
for (int i = 0; i < n; i++) {
payment[i] = sc.nextInt();
}
solve(out, in, payment);
}
@Test
public void test() {
int[] out = {5, 4, 3};
int[] in = {4, 2, 2};
int[] payment = {3, 2, 1};
solve(out, in, payment);
}
//官方思路:可能是错的贪心的思路:尽可能将花费大的先填满
public static void solve(int[] out, int[] in, int[] payment) {
//构造按大小排序的套娃
List<int[]> taoS1 = new ArrayList<>();
List<int[]> taoS2 = new ArrayList<>();
for (int i = 0; i < out.length; i++) {
taoS1.add(new int[]{out[i], in[i], payment[i], i});
taoS2.add(new int[]{out[i], in[i], payment[i], i});
}
Collections.sort(taoS1, ((o1, o2) -> o1[0] - o2[0]));//按空间有小到大排序
Collections.sort(taoS2, ((o1, o2) -> o1[2] - o2[2]));//按花费排序
//按照空间大小遍历,找到最大的未使用的最大的套娃
int n = out.length;
int rightThreshold = n - 1;
for (int i = n - 1; i >= 0; i--) {
int left = 0;
int right = rightThreshold;
int mid = (left + right + 1) >> 1;//四舍五入
while (left < right) {
mid = (left + right + 1) >> 1;
if (taoS2.get(i)[1] >= taoS1.get(mid)[0]) left = mid;//当前花费最大的能被放入;寻找更大能放入的
else left = mid + 1;//为了快速收敛?
}
if (taoS1.get(mid)[3] == taoS2.get(i)[3]) right--;//使用的自己,那不行
if (taoS2.get(i)[1] < taoS1.get(right)[0]) break;//当前位置不可能被其他的点放入了
taoS2.get(i)[1] -= taoS1.get(right)[0];
rightThreshold = right - 1;
}
int result = 0;
for (int[] total : taoS2) {
result += total[1] * total[2];
}
System.out.println(result);
}
}

View File

@ -1,38 +1,35 @@
package com.markilue.leecode.test;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Scanner;
public class Fibonaqi {
/**
* 测试使用时间复杂度为n的斐波那契数列递归法
*
*/
// @Test
// public static void testFibonaqi(){
//
// }
public static void main(String[] args) {
int n=5;
System.out.println(fibonacci(1,1,10));
int n = 5;
System.out.println(fibonacci(1, 1, 10));
}
public static int fibonacci(int first,int second,int n){
if(n<=0){
public static int fibonacci(int first, int second, int n) {
if (n <= 0) {
return 0;
}
if(n <3){
if (n < 3) {
return 1;
}else if(n==3){
return first+second;
}
else {
return fibonacci(second,first+second,n-1);
} else if (n == 3) {
return first + second;
} else {
return fibonacci(second, first + second, n - 1);
}
}
}

View File

@ -0,0 +1,5 @@
这个文件夹存放了
1爬虫学习的一些案例实际案例操作 https://github.com/Python3WebSpider
2实际爬取的一些网站等
爬虫学习中心https://setup.scrape.center/

View File

@ -0,0 +1,244 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/8 16:08
@Usage :
@Desc :参考 https://github.com/Python3WebSpider/BeautifulSoupTest
'''
html = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title" name="dromouse"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1"><!-- Elsie --></a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
"""
from bs4 import BeautifulSoup
def baseUse():
soup = BeautifulSoup(html, 'lxml')
print(soup.title) # <title>The Dormouse's story</title>
print(type(soup.title)) # <class 'bs4.element.Tag'>
print(soup.title.string) # The Dormouse's story
print(soup.head) # <head><title>The Dormouse's story</title></head>
print(soup.p) # <p class="title" name="dromouse"><b>The Dormouse's story</b></p>
print(soup.p.name) # 获取节点名称 p
print(soup.p.attrs) # 获取属性 {'class': ['title'], 'name': 'dromouse'}
print(soup.p.attrs['name']) # 获取属性值 dromouse
print(soup.p['name']) # 获取属性值 dromouse
print(soup.body.p['name']) # 嵌套选择 dromouse
print("==========================")
def child():
html = """
<html>
<head>
<title>The Dormouse's story</title>
</head>
<body>
<p class="story">
Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">
<span>Elsie</span>
</a>
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a>
and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>
and they lived at the bottom of a well.
</p>
<p class="story">...</p>
"""
soup = BeautifulSoup(html, 'lxml')
# 子结点
for i, child in enumerate(soup.p.children):
print(i, child)
print("===============================")
# 子孙节点
for i, child in enumerate(soup.p.descendants):
print(i, child)
print("===============================")
def parent():
soup = BeautifulSoup(html, 'lxml')
# 父节点
print(soup.a.parent)
print("===============================")
# 祖父节点
print(type(soup.a.parents))
print(list(enumerate(soup.a.parents)))
print("=============================")
def brother():
html = """
<html>
<body>
<p class="story">
Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">
<span>Elsie</span>
</a>
Hello
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a>
and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>
and they lived at the bottom of a well.
</p>
"""
# 兄弟节点
soup = BeautifulSoup(html, 'lxml')
print('Next Sibling', soup.a.next_sibling)
print('Prev Sibling', soup.a.previous_sibling)
print('Next Siblings', list(enumerate(soup.a.next_siblings)))
print('Prev Siblings', list(enumerate(soup.a.previous_siblings)))
# 找到所有满足条件的
def findAll():
html = '''
<div class="panel">
<div class="panel-heading">
<h4>Hello</h4>
</div>
<div class="panel-body">
<ul class="list" id="list-1">
<li class="element">Foo</li>
<li class="element">Bar</li>
<li class="element">Jay</li>
</ul>
<ul class="list list-small" id="list-2">
<li class="element">Foo</li>
<li class="element">Bar</li>
</ul>
</div>
</div>
'''
soup = BeautifulSoup(html, 'lxml')
print(soup.find_all(name='ul'))
print(type(soup.find_all(name='ul')[0]))
for ul in soup.find_all(name='ul'):
print(ul.find_all(name='li'))
for ul in soup.find_all(name='ul'):
print(ul.find_all(name='li'))
for li in ul.find_all(name='li'):
print(li.string)
# 找属性满足匹配得到
def attrs():
html = '''
<div class="panel">
<div class="panel-heading">
<h4>Hello</h4>
</div>
<div class="panel-body">
<ul class="list" id="list-1" name="elements">
<li class="element">Foo</li>
<li class="element">Bar</li>
<li class="element">Jay</li>
</ul>
<ul class="list list-small" id="list-2">
<li class="element">Foo</li>
<li class="element">Bar</li>
</ul>
</div>
</div>
'''
soup = BeautifulSoup(html, 'lxml')
print(soup.find_all(attrs={'id': 'list-1'}))
print(soup.find_all(attrs={'name': 'elements'}))
# 常用的属性可以不用attrs传递
soup = BeautifulSoup(html, 'lxml')
print(soup.find_all(id='list-1'))
print(soup.find_all(class_='element'))
import re
print(soup.find_all(string=re.compile('Foo')))# string等同于text,即里面的具体内容
# 返回匹配到的第一个元素
def find():
html = '''
<div class="panel">
<div class="panel-heading">
<h4>Hello</h4>
</div>
<div class="panel-body">
<ul class="list" id="list-1">
<li class="element">Foo</li>
<li class="element">Bar</li>
<li class="element">Jay</li>
</ul>
<ul class="list list-small" id="list-2">
<li class="element">Foo</li>
<li class="element">Bar</li>
</ul>
</div>
</div>
'''
soup = BeautifulSoup(html, 'lxml')
print(soup.find(name='ul'))
print(type(soup.find(name='ul')))
print(soup.find(class_='list'))
# css选择器
def cssSelect():
html = '''
<div class="panel">
<div class="panel-heading">
<h4>Hello</h4>
</div>
<div class="panel-body">
<ul class="list" id="list-1">
<li class="element">Foo</li>
<li class="element">Bar</li>
<li class="element">Jay</li>
</ul>
<ul class="list list-small" id="list-2">
<li class="element">Foo</li>
<li class="element">Bar</li>
</ul>
</div>
</div>
'''
soup = BeautifulSoup(html, 'lxml')
print(soup.select('.panel .panel-heading'))
print(soup.select('ul li'))
print(soup.select('#list-2 .element'))
print(type(soup.select('ul')[0]))
# 嵌套选择
soup = BeautifulSoup(html, 'lxml')
for ul in soup.select('ul'):
print(ul.select('li'))
# 获取属性
soup = BeautifulSoup(html, 'lxml')
for ul in soup.select('ul'):
print(ul['id'])
print(ul.attrs['id'])
# 获取文本
soup = BeautifulSoup(html, 'lxml')
for li in soup.select('li'):
print('Get Text:', li.get_text())
print('String:', li.string)
if __name__ == '__main__':
cssSelect()

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/8 16:07
@Usage :
@Desc :
'''

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/8 16:54
@Usage :
@Desc :
'''

View File

@ -0,0 +1,329 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/8 16:54
@Usage :
@Desc :Pyquery学习 参考: https://github.com/Python3WebSpider/PyQueryTest
'''
from pyquery import PyQuery as pq
# 字符串初始化
def stringBase():
html = '''
<div>
<ul>
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
'''
doc = pq(html)
print(doc('li'))
# URL初始化
def URLBase():
doc = pq(url='https://cuiqingcai.com')
print(doc('title'))
# 上述代码等同于下面
# doc = pq(requests.get('https://cuiqingcai.com').text)
# print(doc('title'))
# 文件初始化
def fileBase():
doc = pq(filename='demo.html')
print(doc('li'))
# 基本的css选择器
def cssSelect():
html = '''
<div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
'''
doc = pq(html)
print(doc('#container .list li'))
print(type(doc('#container .list li')))
#
for item in doc('#container .list li').items():
print(item.text())
# 寻找子节点
def child():
html = '''
<div>
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
'''
doc = pq(html)
items = doc('.list')
print(type(items))
print(items)
lis = items.find('li')
print(type(lis))
print(lis)
#
#
lis = items.children()
print(type(lis))
print(lis)
#
lis = items.children('.active')
print(lis)
def parent():
html = '''
<div class="wrap">
<div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</div>
'''
from pyquery import PyQuery as pq
doc = pq(html)
items = doc('.list')
container = items.parent()
print(type(container))
print(container)
from pyquery import PyQuery as pq
doc = pq(html)
items = doc('.list')
parents = items.parents()
print(type(parents))
print(parents)
parent = items.parents('.wrap')
print(parent)
from pyquery import PyQuery as pq
doc = pq(html)
li = doc('.list .item-0.active')
print(li.siblings())
def brother():
html = '''
<div class="wrap">
<div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</div>
'''
from pyquery import PyQuery as pq
doc = pq(html)
li = doc('.list .item-0.active')
print(li.siblings('.active'))
from pyquery import PyQuery as pq
doc = pq(html)
li = doc('.item-0.active')
print(li)
print(str(li))
from pyquery import PyQuery as pq
doc = pq(html)
# 可能是多个节点
lis = doc('li').items()
print(type(lis))
for li in lis:
print(li, type(li))
def attrs():
html = '''
<div class="wrap">
<div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</div>
'''
from pyquery import PyQuery as pq
doc = pq(html)
a = doc('.item-0.active a')
print(a, type(a))
print(a.attr('href'))
a = doc('a')
print(a, type(a))
print(a.attr('href'))
print(a.attr.href)
from pyquery import PyQuery as pq
doc = pq(html)
a = doc('a')
for item in a.items():
# 获取属性和文本
print(item.attr('href'),item.text())
def getHTML():
html = '''
<div class="wrap">
<div id="container">
<ul class="list">
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</div>
'''
from pyquery import PyQuery as pq
doc = pq(html)
li = doc('li')
print(li.html()) # 第一个节点对应的html <a href="link2.html">second item</a>
print(li.text()) # 所有匹配的节点的文本 second item third item fourth item fifth item
print(type(li.text()))
# 增加或者删除节点的class
def operateNode():
html = '''
<div class="wrap">
<div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</div>
'''
from pyquery import PyQuery as pq
doc = pq(html)
li = doc('.item-0.active')
print(li)
li.removeClass('active')
print(li)
li.addClass('active')
print(li)
'''
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-0"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
'''
def operateNodeInformation():
html = '''
<ul class="list">
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
</ul>
'''
from pyquery import PyQuery as pq
doc = pq(html)
li = doc('.item-0.active')
print(li)
li.attr('name', 'link')
print(li)
li.text('changed item')
print(li)
li.html('<span>changed item</span>')
print(li)
'''
<li class="item-0 active" name="link"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-0 active" name="link">changed item</li>
<li class="item-0 active" name="link"><span>changed item</span></li>
'''
def removeInformation():
html = '''
<div class="wrap">
Hello, World
<p>This is a paragraph.</p>
</div>
'''
from pyquery import PyQuery as pq
doc = pq(html)
wrap = doc('.wrap')
print(wrap.text())
'''
Hello, World
This is a paragraph.
'''
wrap.find('p').remove()
print(wrap.text())
'''
Hello, World
'''
# 伪类选择器
def fakeCSSSelect():
html = '''
<div class="wrap">
<div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</div>
'''
from pyquery import PyQuery as pq
doc = pq(html)
li = doc('li:first-child')
print(li)
li = doc('li:last-child')
print(li)
li = doc('li:nth-child(2)')
print(li)
li = doc('li:gt(2)')
print(li)
li = doc('li:nth-child(2n)')
print(li)
li = doc('li:contains(second)')
print(li)
if __name__ == '__main__':
fakeCSSSelect()

View File

@ -0,0 +1,195 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/8 15:15
@Usage :
@Desc :
'''
from lxml import etree
'''
XPath基本规则:
1) nodename:选择此节点的所有子节点
2) /:从当前节点选取直接子节点
3) //:从当前阶段选择子孙节点
4) .:选取当前节点
5) ..:选取当前节点的父节点
6) @:选取属性
举例:
//title[@lang='eng]代表选择所有名称为title,同时属性lang的值为eng的节点
'''
def htmlByString():
text = '''
<div>
<ul>
<li class="item-0"><a href="link1.html">first item</a></li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-inactive"><a href="link3.html">third item</a></li>
<li class="item-1"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a>
</ul>
</div>
'''
html = etree.HTML(text)
result = etree.tostring(html)
print(result.decode('utf-8'))
def htmlByFile():
html = etree.parse('./test.html', etree.HTMLParser())
result = etree.tostring(html)
print(result.decode('utf-8'))
def allNode():
html = etree.parse('./test.html', etree.HTMLParser())
# 从头开始匹配所有的
result = html.xpath('//*')
print(result)
print(result[0])
# 匹配所有li的
result = html.xpath('//li')
print(result)
print(result[0])
# 子节点匹配
def childNode():
html = etree.parse('./test.html', etree.HTMLParser())
# 匹配所有li的子节点a
result = html.xpath('//li/a')
print(result)
print(result[0])
# 匹配所有li的子孙节点a 相当于只要是子节点下面的就可以匹配上
result = html.xpath('//ul//a')
print(result)
print(result[0])
# 父节点匹配
def fatherNode():
html = etree.parse('./test.html', etree.HTMLParser())
# 匹配a节点属性href是link4.html的父节点的class属性
result = html.xpath('//a[@href="link4.html"]/../@class')
print(result)
# 也可以通过parent::来获取
result = html.xpath('//a[@href="link4.html"]/parent::*/@class')
print(result)
# 文本获取
def textGet():
html = etree.parse('./test.html', etree.HTMLParser())
# 匹配li节点属性class是item-0的节点的子节点a的text
result = html.xpath('//li[@class="item-0"]/a/text()')
print(result) # ['first item', 'fifth item']
# 匹配li节点属性class是item-0的节点的子孙节点的text
result = html.xpath('//li[@class="item-0"]//text()')
print(result) # ['first item', 'fifth item', '\r\n ']
# 属性获取
def fieldGet():
html = etree.parse('./test.html', etree.HTMLParser())
# 匹配li节点属性class是item-0的节点的子节点a的href属性
result = html.xpath('//li/a/@href')
print(result) # ['link1.html', 'link2.html', 'link3.html', 'link4.html', 'link5.html']
# 属性多值匹配
def fieldsGet():
text = '''
<li class="li li-first"><a href="link.html">first item</a></li>
'''
html = etree.HTML(text)
result = html.xpath('//li[@class="li"]/a/text()')
print(result) # [] 匹配不到
result = html.xpath('//li[contains(@class, "li")]/a/text()')
print(result) # ['first item'] contains匹配到了
# 多属性匹配
def fieldssGet():
text = '''
<li class="li li-first" name="item"><a href="link.html">first item</a></li>
'''
html = etree.HTML(text)
# 多属性用and连接
result = html.xpath('//li[contains(@class, "li") and @name="item"]/a/text()')
print(result)
# 按序选择
def orderGet():
text = '''
<div>
<ul>
<li class="item-0"><a href="link1.html">first item</a></li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-inactive"><a href="link3.html">third item</a></li>
<li class="item-1"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a>
</ul>
</div>
'''
html = etree.HTML(text)
result = html.xpath('//li[1]/a/text()')
print(result) # ['first item']
result = html.xpath('//li[last()]/a/text()')
print(result) # ['fifth item']
result = html.xpath('//li[position()<3]/a/text()')
print(result) # ['first item', 'second item']
result = html.xpath('//li[last()-2]/a/text()')
print(result) # ['third item']
def nodeSelect():
text = '''
<div>
<ul>
<li class="item-0"><a href="link1.html"><span>first item</span></a></li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-inactive"><a href="link3.html">third item</a></li>
<li class="item-1"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a>
</ul>
</div>
'''
html = etree.HTML(text)
result = html.xpath('//li[1]/ancestor::*')
print(result)
# ancestor获取祖先
result = html.xpath('//li[1]/ancestor::div')
print(result)
# attribute获取所有属性
result = html.xpath('//li[1]/attribute::*')
print(result)
# child获取子节点
result = html.xpath('//li[1]/child::a[@href="link1.html"]')
print(result)
# descendant获取子孙结点
result = html.xpath('//li[1]/descendant::span')
print(result)
# following获取当前节点之后的所有节点
result = html.xpath('//li[1]/following::*[2]')
print(result)
# following-sibling获取当前节点之后的同级节点
result = html.xpath('//li[1]/following-sibling::*')
print(result)
if __name__ == '__main__':
nodeSelect()

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/8 15:15
@Usage :
@Desc :
'''

View File

@ -0,0 +1,9 @@
<div>
<ul>
<li class="item-0"><a href="link1.html">first item</a></li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-inactive"><a href="link3.html">third item</a></li>
<li class="item-1"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a>
</ul>
</div>

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/8 15:12
@Usage :
@Desc :
'''

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 14:03
@Usage :
@Desc :
'''

View File

@ -0,0 +1,50 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 14:39
@Usage :
@Desc : 保存成Json
'''
import json
str = '''
[{
"name": "Bob",
"gender": "male",
"birthday": "1992-10-18"
}, {
"name": "Selina",
"gender": "female",
"birthday": "1995-10-18"
}]
'''
print(type(str))
data = json.loads(str)
print(data)
print(type(data))
import json
data = [{
'name': 'Bob',
'gender': 'male',
'birthday': '1992-10-18'
}]
with open('data.json', 'w', encoding='utf-8') as file:
file.write(json.dumps(data))
with open('data.json', 'w', encoding='utf-8') as file:
# indent就是有缩进的
file.write(json.dumps(data, indent=2))
data = [{
'name': '张三',
'gender': 'male',
'birthday': '1992-10-18'
}]
with open('data.json', 'w', encoding='utf-8') as file:
# indent就是有缩进的,ensure_ascii规定编码格式(输出中文)
file.write(json.dumps(data, indent=2, ensure_ascii=False))

View File

@ -0,0 +1,33 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 15:03
@Usage :
@Desc :
'''
import pymysql
data = {
'id': '20120001',
'name': 'Bob',
'age': 20
}
# 通过字典动态构建插入语句
table = 'students'
keys = ', '.join(data.keys())
values = ', '.join(['%s'] * len(data))
db = pymysql.connect(host='localhost', user='root',
password=None, port=3306, db='spiders')
cursor = db.cursor()
sql = 'INSERT INTO {table}({keys}) VALUES ({values})'.format(
table=table, keys=keys, values=values)
try:
if cursor.execute(sql, tuple(data.values())):
print('Successful')
db.commit()
except Exception as e:
print('Failed', e)
db.rollback()
db.close()

View File

@ -0,0 +1,38 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 14:08
@Usage :
@Desc :保存为Text
'''
import requests
from pyquery import PyQuery as pq
import re
url = 'https://ssr1.scrape.center/'
html = requests.get(url).text
doc = pq(html)
items = doc('.el-card').items()
file = open('movies.txt', 'w', encoding='utf-8')
for item in items:
# 名称
name = item.find('a > h2').text()
file.write(f'名称: {name}\n')
# 类别
categories = [item.text() for item in item.find('.categories button span').items()]
file.write(f'类别: {categories}\n')
# 上映时间
published_at = item.find('.info:contains(上映)').text()
published_at = re.search('(\d{4}-\d{2}-\d{2})', published_at).group(1) \
if published_at and re.search('\d{4}-\d{2}-\d{2}', published_at) else None
file.write(f'上映时间: {published_at}\n')
# 评分
score = item.find('p.score').text()
file.write(f'评分: {score}\n')
file.write(f'{"=" * 50}\n')
file.close()

View File

@ -0,0 +1,67 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 16:01
@Usage :
@Desc :
'''
import requests
import logging
import json
from os import makedirs
from os.path import exists
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s: %(message)s')
INDEX_URL = 'https://spa1.scrape.center/api/movie/?limit={limit}&offset={offset}'
DETAIL_URL = 'https://spa1.scrape.center/api/movie/{id}'
LIMIT = 10
TOTAL_PAGE = 10
RESULTS_DIR = 'results'
exists(RESULTS_DIR) or makedirs(RESULTS_DIR)
def scrape_api(url):
logging.info('scraping %s...', url)
try:
response = requests.get(url)
if response.status_code == 200:
return response.json()
logging.error('get invalid status code %s while scraping %s',
response.status_code, url)
except requests.RequestException:
logging.error('error occurred while scraping %s', url, exc_info=True)
def scrape_index(page):
url = INDEX_URL.format(limit=LIMIT, offset=LIMIT * (page - 1))
return scrape_api(url)
def scrape_detail(id):
url = DETAIL_URL.format(id=id)
return scrape_api(url)
def save_data(data):
name = data.get('name')
data_path = f'{RESULTS_DIR}/{name}.json'
json.dump(data, open(data_path, 'w', encoding='utf-8'),
ensure_ascii=False, indent=2)
def main():
for page in range(1, TOTAL_PAGE + 1):
index_data = scrape_index(page)
for item in index_data.get('results'):
id = item.get('id')
detail_data = scrape_detail(id)
logging.info('detail data %s', detail_data)
save_data(detail_data)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 15:58
@Usage :
@Desc :
'''

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 16:19
@Usage :
@Desc :
'''

View File

@ -0,0 +1,28 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 16:57
@Usage : aiohttp库的使用
@Desc :
@参考:https://github.dev/Python3WebSpider/AsyncTest demo12
'''
import aiohttp
import asyncio
async def fetch(session, url):
async with session.get(url) as response:
return await response.text(), response.status
async def main():
async with aiohttp.ClientSession() as session:
html, status = await fetch(session, 'https://cuiqingcai.com')
print(f'html: {html[:100]}...')
print(f'status: {status}')
if __name__ == '__main__':
asyncio.run(main())

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 17:02
@Usage :
@Desc :
'''

View File

@ -0,0 +1,86 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 19:14
@Usage :
@Desc :
'''
import asyncio
import aiohttp
import logging
from motor.motor_asyncio import AsyncIOMotorClient
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s: %(message)s')
INDEX_URL = 'https://spa5.scrape.center/api/book/?limit=18&offset={offset}'
DETAIL_URL = 'https://spa5.scrape.center/api/book/{id}'
PAGE_SIZE = 18
PAGE_NUMBER = 1
CONCURRENCY = 5
session = None
MONGO_CONNECTION_STRING = 'mongodb://localhost:27017'
MONGO_DB_NAME = 'books'
MONGO_COLLECTION_NAME = 'books'
client = AsyncIOMotorClient(MONGO_CONNECTION_STRING)
db = client[MONGO_DB_NAME]
collection = db[MONGO_COLLECTION_NAME]
semaphore = asyncio.Semaphore(CONCURRENCY)
async def scrape_api(url):
async with semaphore:
try:
logging.info('scraping %s', url)
async with session.get(url) as response:
return await response.json()
except aiohttp.ClientError:
logging.error('error occurred while scraping %s', url, exc_info=True)
async def scrape_index(page):
url = INDEX_URL.format(offset=PAGE_SIZE * (page - 1))
return await scrape_api(url)
async def scrape_detail(id):
url = DETAIL_URL.format(id=id)
data = await scrape_api(url)
await save_data(data)
async def save_data(data):
logging.info('saving data %s', data)
if data:
return await collection.update_one({
'id': data.get('id')
}, {
'$set': data
}, upsert=True)
async def main():
# index tasks
global session
session = aiohttp.ClientSession()
scrape_index_tasks = [asyncio.ensure_future(scrape_index(page)) for page in range(1, PAGE_NUMBER + 1)]
results = await asyncio.gather(*scrape_index_tasks)
# detail tasks
print('results', results)
ids = []
for index_data in results:
if not index_data: continue
for item in index_data.get('results'):
ids.append(item.get('id'))
scrape_detail_tasks = [asyncio.ensure_future(scrape_detail(id)) for id in ids]
await asyncio.wait(scrape_detail_tasks)
await session.close()
if __name__ == '__main__':
asyncio.run(main())

View File

@ -0,0 +1,64 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 16:57
@Usage : aiohttp库的使用
@Desc :
@参考:https://github.dev/Python3WebSpider/AsyncTest demo12
'''
import aiohttp
import asyncio
async def fetch(session, url):
async with session.get(url) as response:
return await response.text(), response.status
async def main():
async with aiohttp.ClientSession() as session:
html, status = await fetch(session, 'https://cuiqingcai.com')
print(f'html: {html[:100]}...')
print(f'status: {status}')
# 给url参数
async def main1():
params = {'name': 'germey', 'age': 25}
async with aiohttp.ClientSession() as session:
async with session.get('https://httpbin.org/get', params=params) as response:
print(await response.text())
'''
session还支持其他请求类型:
session.post('https://httpbin.org/post', data=b'data')
session.put('https://httpbin.org/put', data=b'data')
session.delete('https://httpbin.org/delete')
session.head('https://httpbin.org/get')
session.options('https://httpbin.org/get')
session.patch('https://httpbin.org/patch', data=b'data')
'''
# 返回的response对象
async def main2():
data = {'name': 'germey', 'age': 25}
# 有些返回字段前面需要加await有些则不需要,原则是,如果返回的是一个协程对象(如async修饰的方法),
# 那么前面就要加await,具体可以看aiohttp的API,其链接为 https://docs.aiohttp.org/en/stable/client_reference.html
async with aiohttp.ClientSession() as session:
async with session.post('https://httpbin.org/post', data=data) as response:
print('status:', response.status)
print('headers:', response.headers)
print('body:', await response.text())
print('bytes:', await response.read())
print('json:', await response.json())
# 超时设置
async def main3():
timeout = aiohttp.ClientTimeout(total=0.1)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get('https://httpbin.org/get') as response:
print('status:', response.status)
if __name__ == '__main__':
asyncio.run(main2())

View File

@ -0,0 +1,42 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 16:57
@Usage : 并发限制 防止一次太多爬崩网站 semaphore
@Desc :
@参考:https://github.dev/Python3WebSpider/AsyncTest
'''
import aiohttp
import asyncio
CONCURRENCY = 5
URL = 'https://www.baidu.com/'
semaphore = asyncio.Semaphore(CONCURRENCY)
session = None
async def scrape_api():
async with semaphore:
print('scraping', URL)
async with session.get(URL) as response:
# await asyncio.sleep(1)
return await response.text()
async def main():
global session
session = aiohttp.ClientSession()
scrape_index_tasks = [asyncio.ensure_future(scrape_api()) for _ in range(10000)]
await asyncio.gather(*scrape_index_tasks)
await asyncio.wait(scrape_index_tasks)
await session.close()
if __name__ == '__main__':
# asyncio.run(main())
asyncio.get_event_loop().run_until_complete(main())

View File

@ -0,0 +1,27 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 16:34
@Usage : 多任务协程
@Desc :
@参考: https://github.dev/Python3WebSpider/AsyncTest
'''
import asyncio
import requests
async def request():
url = 'https://www.baidu.com'
status = requests.get(url)
return status
tasks = [asyncio.ensure_future(request()) for _ in range(5)]
print('Tasks:', tasks)
loop = asyncio.get_event_loop()
# 五个任务被顺序执行
loop.run_until_complete(asyncio.wait(tasks))
for task in tasks:
print('Task Result:', task.result())

View File

@ -0,0 +1,33 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 16:34
@Usage : 多任务协程展示协程的优势
@Desc :
@参考: https://github.dev/Python3WebSpider/AsyncTest demo8_1和demo9_1 demo10
'''
import asyncio
import requests
import time
start = time.time()
# 单个执行每个都至少要5秒
async def request():
url = 'https://httpbin.org/delay/5'
print('Waiting for', url)
# 这里无论是加await还是不加await都无法实现真正意义上的异步 需要使用aiohttp
response = requests.get(url)
print('Get response from', url, 'response', response)
tasks = [asyncio.ensure_future(request()) for _ in range(10)]
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
end = time.time()
print('Cost time:', end - start)

View File

@ -0,0 +1,40 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 16:34
@Usage : 多任务协程展示协程的优势
@Desc :
@参考: https://github.dev/Python3WebSpider/AsyncTest demo11
'''
import asyncio
import aiohttp
import time
start = time.time()
async def get(url):
session = aiohttp.ClientSession()
response = await session.get(url)
await response.text()
await session.close()
return response
async def request():
url = 'https://httpbin.org/delay/5'
print('Waiting for', url)
response = await get(url)
print('Get response from', url, 'response', response)
tasks = [asyncio.ensure_future(request()) for _ in range(100)]
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
end = time.time()
print('Cost time:', end - start)
# Cost time: 7.670234203338623

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 17:02
@Usage :
@Desc :
'''

View File

@ -0,0 +1,29 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 16:20
@Usage : asyncio库 可以使用async和await关键字
@Desc :异步爬虫测试 定义协程
@参考https://github.dev/Python3WebSpider/AsyncTest
'''
import asyncio
async def execute(x):
print('Number:', x)
return x
# 创建一个协程对象 coroutine
coroutine = execute(1)
print('Coroutine:', coroutine)
print('After calling execute')
loop = asyncio.get_event_loop()
task = loop.create_task(coroutine)
print('Task:', task)
loop.run_until_complete(task)
print('Task:', task)
print('After calling loop')

View File

@ -0,0 +1,38 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 16:20
@Usage : asyncio库 可以使用async和await关键字
@Desc :异步爬虫测试 定义协程 为某一个task绑定回调方法
@参考https://github.dev/Python3WebSpider/AsyncTest
'''
import asyncio
import requests
async def request():
url = 'https://www.baidu.com'
status = requests.get(url)
return status
def callback(task):
print('Status:', task.result())
coroutine = request()
task = asyncio.ensure_future(coroutine)
# 绑定回调,来保证顺序
task.add_done_callback(callback)
print('Task:', task)
loop = asyncio.get_event_loop()
loop.run_until_complete(task)
print('Task:', task)
# 直接通过task.result()也可以直接获取结果达到类似的效果
loop = asyncio.get_event_loop()
loop.run_until_complete(task)
print('Task:', task)
print('Task Result:', task.result())

View File

@ -0,0 +1,104 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/7 19:15
@Usage :
@Desc :
'''
import logging
from os.path import exists
from os import makedirs
import json
import asyncio
from pyppeteer import launch
from pyppeteer.errors import TimeoutError
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s: %(message)s')
INDEX_URL = 'https://spa2.scrape.center/page/{page}'
TIMEOUT = 10
TOTAL_PAGE = 10
RESULTS_DIR = 'results'
WINDOW_WIDTH, WINDOW_HEIGHT = 1366, 768
exists(RESULTS_DIR) or makedirs(RESULTS_DIR)
browser, tab = None, None
HEADLESS = True
async def init():
global browser, tab
browser = await launch(headless=HEADLESS,
args=['--disable-infobars', f'--window-size={WINDOW_WIDTH},{WINDOW_HEIGHT}'])
tab = await browser.newPage()
await tab.setViewport({'width': WINDOW_WIDTH, 'height': WINDOW_HEIGHT})
async def scrape_page(url, selector):
logging.info('scraping %s', url)
try:
await tab.goto(url)
await tab.waitForSelector(selector, options={
'timeout': TIMEOUT * 1000
})
except TimeoutError:
logging.error('error occurred while scraping %s', url, exc_info=True)
async def scrape_index(page):
url = INDEX_URL.format(page=page)
await scrape_page(url, '.item .name')
async def parse_index():
return await tab.querySelectorAllEval('.item .name', 'nodes => nodes.map(node => node.href)')
async def scrape_detail(url):
await scrape_page(url, 'h2')
async def parse_detail():
url = tab.url
name = await tab.querySelectorEval('h2', 'node => node.innerText')
categories = await tab.querySelectorAllEval('.categories button span', 'nodes => nodes.map(node => node.innerText)')
cover = await tab.querySelectorEval('.cover', 'node => node.src')
score = await tab.querySelectorEval('.score', 'node => node.innerText')
drama = await tab.querySelectorEval('.drama p', 'node => node.innerText')
return {
'url': url,
'name': name,
'categories': categories,
'cover': cover,
'score': score,
'drama': drama
}
async def save_data(data):
name = data.get('name')
data_path = f'{RESULTS_DIR}/{name}.json'
json.dump(data, open(data_path, 'w', encoding='utf-8'), ensure_ascii=False, indent=2)
async def main():
await init()
try:
for page in range(1, TOTAL_PAGE + 1):
await scrape_index(page)
detail_urls = await parse_index()
for detail_url in detail_urls:
await scrape_detail(detail_url)
detail_data = await parse_detail()
logging.info('data %s', detail_data)
await save_data(detail_data)
finally:
await browser.close()
if __name__ == '__main__':
asyncio.get_event_loop().run_until_complete(main())

View File

@ -0,0 +1,111 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/7 15:58
@Usage : 使用Selenium实战爬取 https://spa2.scrape.center/
@Desc : 该网站爬取详情页时存在一个token,这个token的实现逻辑可能不确定并且随事件发生变化
因此需要使用Selenium模拟浏览器操作跳过这段逻辑
'''
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from os import makedirs
from os.path import exists
import logging
from urllib.parse import urljoin
import json
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s: %(message)s')
INDEX_URL = 'https://spa2.scrape.center/page/{page}'
Timeout = 10
Total_page = 10
RESULTS_DIR = 'result'
exists(RESULTS_DIR) or makedirs(RESULTS_DIR)
# 防止有一些网站设置反屏蔽手段
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-automation'])
options.add_experimental_option('useAutomationExtension', False)
# 显示设置超时时间
browser = webdriver.Chrome(options=options)
wait = WebDriverWait(browser, Timeout)
# 爬取网页
def scrape_page(url, condition, locator):
logging.info('scraping %s', url)
try:
browser.get(url)
# 设置等待
wait.until(condition(locator))
except TimeoutException:
logging.error('error occurred while scraping %s', url, exc_info=True)
def scrape_index(page):
url = INDEX_URL.format(page=page)
# 设置等待条件为当所有的index下面的子item都出来之后
scrape_page(url, EC.visibility_of_all_elements_located, locator=(By.CSS_SELECTOR, '#index .item'))
def parse_index():
titles = browser.find_elements(By.CSS_SELECTOR, '#index .item .name')
for title in titles:
href = title.get_attribute("href")
yield urljoin(INDEX_URL, href)
def scrape_detail(url):
return scrape_page(url, EC.visibility_of_element_located, (By.TAG_NAME, 'h2'))
def parse_detail():
url = browser.current_url
name = browser.find_element(By.TAG_NAME, 'h2').text
category = [element.text for element in browser.find_elements(By.CSS_SELECTOR, '.categories button span')]
cover = browser.find_element(By.CLASS_NAME, 'cover').get_attribute("src")
score = browser.find_element(By.CLASS_NAME, 'score').text
drama = browser.find_element(By.CSS_SELECTOR, '.drama p').text
return {
"url": url,
"name": name,
"category": category,
"cover": cover,
"score": score,
"drama": drama
}
def save_data(data):
name = data.get('name')
data_path = f'{RESULTS_DIR}/{name}.json'
json.dump(data, open(data_path, 'w', encoding='utf-8'), ensure_ascii=False, indent=2)
def main():
try:
for page in range(1, Total_page + 1):
scrape_index(page)
# 页面加载完毕之后获取对应的url
detail_urls=list(parse_index())
# logging.info('detail data %s', list(detail_urls))
# 遍历所有的detail_urls,获取详情页信息
for detail_url in detail_urls:
scrape_detail(detail_url)
detail_info = parse_detail()
logging.info('detail info %s', detail_info)
save_data(detail_info)
finally:
browser.close()
if __name__ == '__main__':
main()

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 19:46
@Usage :
@Desc :
'''

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/7 19:32
@Usage :
@Desc :
'''

View File

@ -0,0 +1,59 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/7 19:32
@Usage :
@Desc :
'''
from selenium import webdriver
from pyquery import PyQuery as pq
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
import re
# 解析名字,排序获得正确的顺序
def parse_name(name_html):
chars = name_html('.char')
items = []
for char in chars.items():
items.append({
'text': char.text().strip(),
'left': int(re.search('(\d+)px', char.attr('style')).group(1))
})
items = sorted(items, key=lambda x: x['left'], reverse=False)
return ''.join([item.get('text') for item in items])
# 判断如果是完整的就不进行下述操作
def parse_name_whole(name_html):
has_whole = name_html('.whole')
if has_whole:
return name_html.text()
else:
chars = name_html('.char')
items = []
for char in chars.items():
items.append({
'text': char.text().strip(),
'left': int(re.search('(\d+)px', char.attr('style')).group(1))
})
items = sorted(items, key=lambda x: x['left'], reverse=False)
return ''.join([item.get('text') for item in items])
browser = webdriver.Chrome()
browser.get('https://antispider3.scrape.center/')
WebDriverWait(browser, 10) \
.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.item')))
html = browser.page_source
doc = pq(html)
names = doc('.item .name')
for name_html in names.items():
name = parse_name_whole(name_html)
print(name)
browser.close()

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/7 13:27
@Usage :
@Desc :
'''

View File

@ -0,0 +1,40 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/7 13:34
@Usage : playwright基本使用
@Desc :
@参考https://github.dev/Python3WebSpider/PlaywrightTest
'''
# playwright既支持Pyppetter的异步模式又支持selenium的同步模式
import asyncio
# 同步模式
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
for browser_type in [p.chromium, p.firefox, p.webkit]:
browser = browser_type.launch(headless=False)
page = browser.new_page()
page.goto('https://www.baidu.com')
page.screenshot(path=f'screenshot-{browser_type.name}.png')
print(page.title())
browser.close()
# 异步模式
from playwright.async_api import async_playwright
async def main():
async with async_playwright() as p:
for browser_type in [p.chromium, p.firefox, p.webkit]:
browser = await browser_type.launch(headless=False)
page = await browser.new_page()
await page.goto('https://www.baidu.com')
await page.screenshot(path=f'screenshot-{browser_type.name}.png')
print(await page.title())
await browser.close()
asyncio.run(main())

View File

@ -0,0 +1,34 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/7 14:00
@Usage :
@Desc :playWright有一个强大的功能是可以录制我们在浏览器中的操作并自动生成代码
'''
from playwright.sync_api import Playwright, sync_playwright, expect
def run(playwright: Playwright) -> None:
browser = playwright.firefox.launch(headless=False)
# 这里使用context而不是browser可以让每个context都是一个独立的上下文环境资源隔离
context = browser.new_context()
page = context.new_page()
page.goto("https://www.baidu.com/")
page.locator("#kw").click()
page.locator("#kw").fill("python")
page.get_by_role("button", name="百度一下").click()
page.get_by_role("button", name="百度一下").click()
page.locator("#kw").click()
page.locator("#kw").fill("nba")
page.get_by_role("button", name="百度一下").click()
page.close()
# ---------------------
context.close()
browser.close()
with sync_playwright() as playwright:
run(playwright)

View File

@ -0,0 +1,44 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/7 14:40
@Usage :
@Desc :playwright还支持移动端浏览器
'''
import time
from playwright.sync_api import sync_playwright
# 模拟打开iPhone 12 Pro Max的safari浏览器
with sync_playwright() as p:
iphone_12_pro_max = p.devices['iPhone 12 Pro Max']
browser = p.webkit.launch(headless=False)
context = browser.new_context(
**iphone_12_pro_max,
locale='zh-CN',
)
page = context.new_page()
page.goto('https://www.whatismybrowser.com/')
# 等待页面的某个状态完成这里传入的state是networkidle表示网络空闲状态
page.wait_for_load_state(state='networkidle')
page.screenshot(path='browser-info.png')
time.sleep(10)
browser.close()
with sync_playwright() as p:
iphone_12_pro_max = p.devices['iPhone 12 Pro Max']
browser = p.webkit.launch(headless=False)
context = browser.new_context(
**iphone_12_pro_max,
locale='zh-CN',
geolocation={'longitude': 116.39014, 'latitude': 39.913904},
permissions=['geolocation']
)
page = context.new_page()
page.goto('https://amap.com')
page.wait_for_load_state(state='networkidle')
page.screenshot(path='location-iphone.png')
time.sleep(10)
browser.close()

View File

@ -0,0 +1,100 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/7 15:12
@Usage :
@Desc : playwright常用操作
'''
from playwright.sync_api import sync_playwright
# 事件监听
def on_response(response):
print(f'Statue {response.status}: {response.url}')
# 截获ajax命令
def on_response1(response):
if '/api/movie/' in response.url and response.status == 200:
print(response.json())
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
page = browser.new_page()
# 监听response时间每次网络请求得到响应的时候会触发这个事件
# page.on('response', on_response)
page.on('response', on_response1)
page.goto('https://spa6.scrape.center/')
page.wait_for_load_state('networkidle')
browser.close()
获取页面源代码
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
page = browser.new_page()
page.goto('https://spa6.scrape.center/')
page.wait_for_load_state('networkidle')
html = page.content()
print(html)
browser.close()
# 获取节点内容
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
page = browser.new_page()
page.goto('https://spa6.scrape.center/')
page.wait_for_load_state('networkidle')
# 代表查找class为name的a节点第二个参数传href表示获取超链接的内容
href = page.get_attribute('a.name', 'href')
print(href)
browser.close()
# 获取多个节点
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
page = browser.new_page()
page.goto('https://spa6.scrape.center/')
page.wait_for_load_state('networkidle')
elements = page.query_selector_all('a.name')
for element in elements:
print(element.get_attribute('href'))
print(element.text_content())
browser.close()
# 网络拦截
import re
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
page = browser.new_page()
def canel_request(route, request):
route.abort()
page.route(re.compile(r"(\.png)|(\.jpg)"), canel_request)
page.goto("https://spa6.scrape.center/")
page.wait_for_load_state("networkidle")
page.screenshot(path='no_picture.png')
browser.close()
# 拦截之后填充自己的
import time
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
page = browser.new_page()
def modify_response(route, request):
route.fulfill(path="./custom_response.html")
page.route('/', modify_response)
page.goto("https://spa6.scrape.center/")
time.sleep(10)
browser.close()

View File

@ -0,0 +1,24 @@
from playwright.sync_api import Playwright, sync_playwright, expect
def run(playwright: Playwright) -> None:
browser = playwright.firefox.launch(headless=False)
context = browser.new_context()
page = context.new_page()
page.goto("https://www.baidu.com/")
page.locator("#kw").click()
page.locator("#kw").fill("python")
page.get_by_role("button", name="百度一下").click()
page.get_by_role("button", name="百度一下").click()
page.locator("#kw").click()
page.locator("#kw").fill("nba")
page.get_by_role("button", name="百度一下").click()
page.close()
# ---------------------
context.close()
browser.close()
with sync_playwright() as playwright:
run(playwright)

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 19:53
@Usage :
@Desc :
'''

View File

@ -0,0 +1,30 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 19:53
@Usage :
@Desc : selenium基本用法
@参考: https://github.dev/Python3WebSpider/SeleniumTest
'''
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
browser = webdriver.Chrome()
try:
browser.get('https://www.baidu.com')
# input = browser.find_element_by_id('kw') 旧版写法selenium4.0以上使用下面的写法
input = browser.find_element(By.ID, 'kw')
input.send_keys('Python')
input.send_keys(Keys.ENTER)
wait = WebDriverWait(browser, 10)
wait.until(EC.presence_of_element_located((By.ID, 'content_left')))
print(browser.current_url)
print(browser.get_cookies())
print(browser.page_source)
finally:
browser.close()

View File

@ -0,0 +1,18 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 21:11
@Usage : 对Cookie进行操作
@Desc :获取添加删除cookie
'''
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('https://www.zhihu.com/explore')
print(browser.get_cookies())
browser.add_cookie({'name': 'name', 'domain': 'www.zhihu.com', 'value': 'germey'})
print(browser.get_cookies())
browser.delete_all_cookies()
print(browser.get_cookies())

View File

@ -0,0 +1,22 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 21:14
@Usage : 选项卡管理
@Desc : 访问页面的时候会开起一个个选项卡
'''
import time
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('https://www.baidu.com')
# 开启一个新的选项卡
browser.execute_script('window.open()')
print(browser.window_handles)
browser.switch_to.window(browser.window_handles[1])
browser.get('https://www.taobao.com')
time.sleep(1)
browser.switch_to.window(browser.window_handles[0])
browser.get('https://python.org')

View File

@ -0,0 +1,26 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 21:17
@Usage : 异常处理
@Desc : 可能会遇到获取节点失败的异常,可以对异常进行处理
'''
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException, NoSuchElementException
browser = webdriver.Chrome()
try:
browser.get('https://www.baidu.com')
except TimeoutException:
print('Time out')
try:
browser.find_element(By.ID, 'hello')
except NoSuchElementException:
print('No Such Element')
finally:
browser.close()

View File

@ -0,0 +1,33 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 21:20
@Usage : 反屏蔽
@Desc : 现在很多网站增加了对Selenium的监测如果检测到Selenium打开浏览器就直接屏蔽
基本原理是监测当前浏览器窗口下的window.navigator对象中是否包含webdriver属性
正常使用浏览器这个属性应该是undefined,一旦使用了Selenium就会给window.navigator设置webdriver属性
https://antispider1.scrape.center/ 就是使用了上述原理
'''
from selenium import webdriver
from selenium.webdriver import ChromeOptions
option = ChromeOptions()
option.add_experimental_option('excludeSwitches', ['enable-automation'])
option.add_experimental_option('useAutomationExtension', False)
browser = webdriver.Chrome(options=option)
# 无效,因为这是页面加载完毕之后才执行,但是页面渲染之前已经检测了
browser.execute_script('Object.defineProperty(navigator, "webdriver", {get: () => undefined})')
browser.get('https://antispider1.scrape.center/')
# 使用CDP(chrome开发工具协议)解决这个问题在每个页面刚加载的时候就执行JavaScript语句将webdriver置空
option = ChromeOptions()
option.add_experimental_option('excludeSwitches', ['enable-automation'])
option.add_experimental_option('useAutomationExtension', False)
browser = webdriver.Chrome(options=option)
browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
'source': 'Object.defineProperty(navigator, "webdriver", {get: () => undefined})'
})
browser.get('https://antispider1.scrape.cuiqingcai.com/')

View File

@ -0,0 +1,20 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 21:31
@Usage : 无头模式
@Desc : 之前的案例运行时总会弹出一个路浏览器窗口
现在已经支持无头模式Headless
'''
from selenium import webdriver
from selenium.webdriver import ChromeOptions
option = ChromeOptions()
option.add_argument('--headless')
browser = webdriver.Chrome(options=option)
browser.set_window_size(1366, 768)
browser.get('https://www.baidu.com')
browser.get_screenshot_as_file('preview.png')

View File

@ -0,0 +1,22 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 19:59
@Usage :
@Desc :selenium访问页面与查找节点
'''
from selenium import webdriver
from selenium.webdriver.common.by import By
browser = webdriver.Chrome()
browser.get('https://www.taobao.com')
input_first = browser.find_element(By.ID, 'q')
input_second = browser.find_element(By.CSS_SELECTOR, '#q')
input_third = browser.find_element(By.XPATH, '//*[@id="q"]')
print(input_first, input_second, input_third)
# 多个节点
lis = browser.find_elements(By.CSS_SELECTOR,'.service-bd li')
print(lis)
browser.close()

View File

@ -0,0 +1,23 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 20:04
@Usage :
@Desc :selenium节点交互 驱动浏览器实现一些操作
'''
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
browser = webdriver.Chrome()
browser.get('https://www.taobao.com')
input = browser.find_element(By.ID, 'q')
input.send_keys('iPhone') # 输入文字
time.sleep(1)
input.clear() # 清空文字
input.send_keys('iPad')
button = browser.find_element(By.CLASS_NAME, 'btn-search')
button.click() # 点击搜索

View File

@ -0,0 +1,23 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 20:08
@Usage :
@Desc :selenium动作链 一系列动作连续执行
'''
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
browser = webdriver.Chrome()
url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
browser.get(url)
browser.switch_to.frame('iframeResult')
source = browser.find_element(By.CSS_SELECTOR, '#draggable')
target = browser.find_element(By.CSS_SELECTOR, '#droppable')
actions = ActionChains(browser)
# 模拟鼠标的点击与放下
actions.drag_and_drop(source, target)
actions.perform() # 正式执行上述模拟操作

View File

@ -0,0 +1,20 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 20:14
@Usage :
@Desc :selenium运行javaScrip,有一些操作selenium没有提供API,这时可以直接通过运行javascript实现
'''
from selenium import webdriver
import time
browser = webdriver.Chrome()
browser.get('https://www.zhihu.com/explore')
# browser.get('https://www.taobao.com')
# 将进度条下拉到最底部
browser.execute_script('window.scrollTo(0, document.body.scrollHeight)')
# 弹出警告提示框
browser.execute_script('alert("To Bottom")')
time.sleep(5)
browser.close()

View File

@ -0,0 +1,27 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 20:20
@Usage :
@Desc :获取节点信息
'''
from selenium import webdriver
from selenium.webdriver.common.by import By
browser = webdriver.Chrome()
url = 'https://spa2.scrape.center/'
browser.get(url)
logo = browser.find_element(By.CLASS_NAME, 'logo-image')
print(logo)
# 获取属性
print(logo.get_attribute('src'))
# 获取文本值
title = browser.find_element(By.CLASS_NAME, 'logo-title')
print(title.text)
# 获取ID,位置,标签名,大小
print(title.id)
print(title.location)
print(title.tag_name)
print(title.size)

View File

@ -0,0 +1,26 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 20:31
@Usage : 切换Frame
@Desc : 网页中有一种节点叫iframe,相当于页面的子页面
selenium打开一个页面后默认是在父Frame里面操作这时需要使用switch_to.frame方法切换
'''
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
browser = webdriver.Chrome()
url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
browser.get(url)
browser.switch_to.frame('iframeResult')
try:
logo = browser.find_element(By.CLASS_NAME, 'logo')
except NoSuchElementException:
print('NO LOGO')
browser.switch_to.parent_frame()
logo = browser.find_element(By.CLASS_NAME, 'logo')
print(logo)
print(logo.text)

View File

@ -0,0 +1,31 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 20:38
@Usage : 延时等待
@Desc :get方法在网页框架加载结束后才会结束执行
所以在get方法执行完毕之后其结果可能并不是浏览器完全加载完成的页面
所以在必要时我们需要设置浏览器延时等待一段时间
'''
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
browser = webdriver.Chrome()
# 隐式等待 :效果并不好,因为只规定了一个固定时间,页面加载事件会受到网络条件影响
browser.implicitly_wait(10)
browser.get('https://spa2.scrape.center/')
input = browser.find_element(By.CLASS_NAME, 'logo-image')
print(input)
# 显示等待:指定要查找的节点和最长等待时间
browser.get('https://www.taobao.com/')
wait = WebDriverWait(browser, 10)
# presence_of_element_located这个条件表示节点出现
input = wait.until(EC.presence_of_element_located((By.ID, 'q')))
# element_to_be_clickable表示按钮可点击
button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.btn-search')))
print(input, button)

View File

@ -0,0 +1,21 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 20:38
@Usage : 模拟浏览器前进后退功能
@Desc :
'''
import time
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('https://www.baidu.com/')
browser.get('https://www.taobao.com/')
browser.get('https://www.python.org/')
# 后退
browser.back()
time.sleep(1)
# 前进
browser.forward()
browser.close()

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/7 20:07
@Usage :
@Desc :
'''

View File

@ -0,0 +1,28 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/7 20:07
@Usage :
@Desc :字体反扒测试
'''
from selenium import webdriver
from pyquery import PyQuery as pq
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
browser = webdriver.Chrome()
browser.get('https://antispider4.scrape.center/')
WebDriverWait(browser, 10) \
.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.item')))
html = browser.page_source
doc = pq(html)
items = doc('.item')
for item in items.items():
name = item('.name').text()
categories = [o.text() for o in item('.categories button').items()]
score = item('.score').text()
print(f'name: {name} categories: {categories} score: {score}')
browser.close()

View File

@ -0,0 +1,20 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/7 20:20
@Usage :
@Desc :尝试解析对应的css源文件来获取对应的我们想要的
'''
import re
import requests
url = 'https://antispider4.scrape.center/css/app.654ba59e.css'
response = requests.get(url)
pattern = re.compile('.icon-(.*?):before\{content:"(.*?)"\}')
results = re.findall(pattern, response.text)
icon_map = {item[0]: item[1] for item in results}
print(icon_map['789'])
print(icon_map['437'])

View File

@ -0,0 +1,49 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/7 20:22
@Usage :
@Desc :
'''
from selenium import webdriver
from pyquery import PyQuery as pq
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
import re
import requests
url = 'https://antispider4.scrape.center/css/app.654ba59e.css'
response = requests.get(url)
pattern = re.compile('.icon-(.*?):before\{content:"(.*?)"\}')
results = re.findall(pattern, response.text)
icon_map = {item[0]: item[1] for item in results}
def parse_score(item):
elements = item('.icon')
icon_values = []
for element in elements.items():
class_name = (element.attr('class'))
icon_key = re.search('icon-(\d+)', class_name).group(1)
icon_value = icon_map.get(icon_key)
icon_values.append(icon_value)
return ''.join(icon_values)
browser = webdriver.Chrome()
browser.get('https://antispider4.scrape.center/')
WebDriverWait(browser, 10) \
.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.item')))
html = browser.page_source
doc = pq(html)
items = doc('.item')
for item in items.items():
name = item('.name').text()
categories = [o.text() for o in item('.categories button').items()]
score = parse_score(item)
print(f'name: {name} categories: {categories} score: {score}')
browser.close()

Some files were not shown because too many files have changed in this diff Show More