• 热门专题

Python3.4123062015年3月验证码识别

作者:  发布日期:2015-03-17 19:16:05
  • import ssl
    import json
    from PIL import Image
    import urllib
    import re
    import urllib.request as urllib2
    if hasattr(ssl, '_create_unverified_context'):
        ssl.create_default_context = ssl._create_unverified_context
    UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36"
    pic_url = "https://kyfw.12306.cn/otn/passcodeNew/getPassCodeNew?module=login&rand=sjrand&0.21191171556711197"
    def get_img():
        resp = urllib2.urlopen(pic_url)
        raw = resp.read()
        with open('./tmp.jpg', 'wb') as fp:
            fp.write(raw)
        return Image.open('./tmp.jpg')
    def get_sub_img(im, x, y):
        assert 0 <= x <= 3
        assert 0 <= y <= 2
        WITH = HEIGHT = 68
        left = 5 + (67 + 5) * x
        top = 41 + (67 + 5) * y
        right = left + 67
        bottom = top + 67
        return im.crop((left, top, right, bottom))
    def baidu_stu_lookup(im):
        url = "http://stu.baidu.com/n/image?fr=html5&needRawImageUrl=true&id=WU_FILE_0&name=233.png&type=image%2Fpng&lastModifiedDate=Mon+Mar+16+2015+20%3A49%3A11+GMT%2B0800+(CST)&size="
        im.save("./query_temp_img.png")
        raw = open("./query_temp_img.png", 'rb').read()
        url = url + str(len(raw))
        req = urllib2.Request(url, raw, {'Content-Type': 'image/png', 'User-Agent': UA})
        resp_url = urllib2.urlopen(req).read()
    
        url = "http://stu.baidu.com/n/searchpc?queryImageUrl=" + urllib2.quote(resp_url)
        req = urllib2.Request(url, headers={'User-Agent': UA})
        resp = urllib2.urlopen(req)
        html = resp.read().decode()
        return baidu_stu_html_extract(html)
    def baidu_stu_html_extract(html):
    
        pattern = re.compile(r"keywords:'(.*?)'")
        matches = pattern.findall(html)
        if not matches:
            return '[UNKOWN]'
        json_str = matches[0]
        json_str = json_str.replace('\\x22', '"').replace('\\\\', '\\')
        result = [item['keyword'] for item in json.loads(json_str)]
        return '|'.join(result) if result else '[UNKOWN]'
    if __name__ == '__main__':
        im = get_img()
        for y in range(2):
            for x in range(4):
                im2 = get_sub_img(im, x, y)
                result = baidu_stu_lookup(im2)
                print((y, x), result)
    
    
    

    改自https://github.com/andelf/fuck12306/blob/master/fuck12306.py

    Python 3.4 可用

About IT165 - 广告服务 - 隐私声明 - 版权申明 - 免责条款 - 网站地图 - 网友投稿 - 联系方式
本站内容来自于互联网,仅供用于网络技术学习,学习中请遵循相关法律法规