锐单电子商城 , 一站式电子元器件采购平台!
  • 电话:400-990-0325

【2021-12-17】css反爬+pytesseract文字识别

时间:2022-11-15 04:30:00 gwk40温度传感器集成电路lm2575hvn21zkn矩形连接器j30jku3e3变送器传感器lr18xbn08lum

目标网站:https://dc.simuwang.com/product/HF00003MZO.html

const crypto = require("crypto-js"); window = global;  function decrypt(data, code, key_js, id) { 
            code = parseInt(code)     data = new Buffer.from(data, "base64").toString("binary")     eval(key_js);     var key = window[id];     3 === code ?     (key = key["split"]("")["reverse"]()["join"]("")) :     4 === code ?     (key = key["slice"](2)) :     5 === code && (key = key["slice"](0, key["length"] - 2));     ms = crypto.MD5(key).toString();
    key = crypto.enc.Utf8.parse(ms);
    iv = crypto.enc.Utf8.parse(ms["slice"](16, 32));
    return crypto.AES.decrypt(data, key, { 
       
        iv: iv,
        padding: crypto.pad.Pkcs7
    }).toString(crypto.enc.Utf8)
}

module.exports = { 
       
    decrypt
}
import re
import time
import execjs
import demjson
# import asyncio
# import selenium
import requests
import pytesseract
from PIL import Image
# from pyppeteer import launch
from selenium import webdriver
from selenium.webdriver.common.by import By
# from requests_html import HTMLSession,HTML
from selenium.webdriver.chrome.options import Options




chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'



class MI:
    def __init__(self,jsCode):
        self.jsCode = jsCode
        self.browser = webdriver.Chrome(options=chrome_options)


    def selenium_html(self):
        self.browser.get("http://127.0.0.1:3000/demo")
        self.browser.execute_script(jsCode)
        # browser.get("http://127.0.0.1:3000/")
        
        picture_url = self.browser.get_screenshot_as_file('./demo.png')
        print("%s:截图成功!!!" % picture_url)

        code_element = self.browser.find_element(By.TAG_NAME, "html")
        left=code_element.location['x']     #通过location和size获取元素所在像素位置和尺寸
        top=code_element.location['y']
        right=code_element.size['width']+left
        height=code_element.size['height']+top
        im=Image.open('./demo.png')
        img=im.crop((left,top,right,height))   #截取验证码图片保存
        # img.save('./demo.png')
        chinese_str = pytesseract.image_to_string(img)
        print([i.split(' ') for i in chinese_str.split('\n') if len(i.split(' '))>3])

    def exit(self):
        self.browser.quit()


def init():
    form_data={ 
       
      "code": 5,
      "data": "
锐单商城拥有海量元器件数据手册IC替代型号,打造电子元器件IC百科大全!

相关文章