#!/usr/bin/env python
#-*- coding: utf-8 -*-
import os
from os.path import join, exists
import urllib2
def getRequest(url):
request = urllib2.Request(url)
request.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)')
try:
try:
response = urllib2.urlopen(request,timeout=20)
return response.read() #.decode('gbk','ignore').encode('utf-8') #.replace(u'�','')
except Exception,e:
print "erorr %s %s" % (url,e)
return None
except urllib2.HTTPError, e:
print e.code
def saveToFile(filepath,filename,content):
#with open(join("/data/scrapy/comms/", filename), 'wb') as f:
with open(join(filepath,filename), 'wb') as f:
f.write(content)
if __name__ == '__main__':
htmlstr=getRequest('https://www.google.com')
saveToFile('/home/hqman','google.html',htmlstr)