使用python遇到UnicodeDecodeError: 'gb2312' codec can't decode byte 0x88 in position 164111: illegal multibyte sequence
# 基金抓取
from urllib import request
import chardet
page1_url = "http://fund.eastmoney.com/fund.html"
def getHtml(pageUrl):
response = request.urlopen(pageUrl)
raw_html = response.read()
getEncoding = chardet.detect(raw_html)['encoding']
src = raw_html.decode(getEncoding)
print(src)
getHtml(page1_url)
这么办?大概意思是 网页有 非法字符你需要加上ignore
# 基金抓取
from urllib import request
import chardet
page1_url = "http://fund.eastmoney.com/fund.html"
def getHtml(pageUrl):
response = request.urlopen(pageUrl)
raw_html = response.read()
getEncoding = chardet.detect(raw_html)['encoding']
src = raw_html.decode(getEncoding, 'ignore')
print(src)
getHtml(page1_url)