Crawler
Crawler Html
#coding=utf-8
import urllib
def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
return html
html = getHtml("http://www.xcar.com.cn/bbs/f738digestp1.html")
print html
Got a garbled html.May I should try another lib.
#coding=utf-8
import urllib2
def getHtml(url):
page = urllib2.urlopen(url)
html = page.read()
return html
html = getHtml("http://www.xcar.com.cn/bbs/f738digestp1.html")
print html
Also garbled.Try something like this:
#coding=utf-8
import urllib2
def getHtml(url):
page = urllib2.urlopen(url)
html = page.read().decode('gbk')
return html
html = getHtml("http://www.xcar.com.cn/bbs/f738digestp1.html")
print html
It works.