self_example/Spider/Chapter08_验证码的识别/深度学习识别图形验证码/encoding.py

66 lines
1.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/12 12:54
@Usage :
@Desc :
'''
# -*- coding: UTF-8 -*-
import numpy as np
import setting
def encode(text):
vector = np.zeros(setting.ALL_CHAR_SET_LEN * setting.MAX_CAPTCHA, dtype=float)
def char2pos(c):
if c == '_':
k = 62
return k
# ord()用来返回单个字符的ascii值(0-255)获取unicode值
# chr()用来返回一个【0-255】数值对应的ascii符号
# 48,65,97分别是0,A,a的ascii值
# 等价于ord(c)-ord(0)
k = ord(c) - 48
if k > 9:
# >9说明不是字母+10是因为0-9位数字
k = ord(c) - 65 + 10
if k > 35:
# +26是因为大写字母有26位
k = ord(c) - 97 + 26 + 10
if k > 61:
raise ValueError('error')
return k
for i, c in enumerate(text):
idx = i * setting.ALL_CHAR_SET_LEN + char2pos(c)
vector[idx] = 1.0
return vector
def decode(vec):
char_pos = vec.nonzero()[0]
text = []
for i, c in enumerate(char_pos):
char_at_pos = i # c/63
char_idx = c % setting.ALL_CHAR_SET_LEN
if char_idx < 10:
char_code = char_idx + ord('0')
elif char_idx < 36:
char_code = char_idx - 10 + ord('A')
elif char_idx < 62:
char_code = char_idx - 36 + ord('a')
elif char_idx == 62:
char_code = ord('_')
else:
raise ValueError('error')
text.append(chr(char_code))
return "".join(text)
if __name__ == '__main__':
e = encode("BK7H")
print(decode(e))