import urllib.request
url=['']*350
page=1
link=1
i=0
while page<=7:
con=urllib.request.urlopen('http://blog.sina.com.cn/s/articlelist_1191258123_0_'+str(page)+'.html').read()
con=con.decode()
title=con.find(r'<a title')
href=con.find(r'href=',title)
html=con.find(r'.html',href)
while title!=-1 and href!=-1 and html!=1 and i<350:
url[i]=con[href+6:html+5]
print(link,' ',url[i])
title=con.find(r'<a title',html)
href=con.find(r'href=',title)
html=con.find(r'.html',href)
i=i+1
link=link+1
else:
print(page,'find end!')
page=page+1
else:
print('all find end')
j=0
link=1
while j<350 and url[j]!='':
content=urllib.request.urlopen(url[j]).read()
f=open(r'hanhan/'+url[j][-26:],'wb')
f.write(content)
print('downloading',link,' ',url[j])
j=j+1
link=link+1
f.close()
else:
print('download article finished')