python 获取网站状态码

原创 赤水  2015年12月24日 07:17 阅读 130 次

检验 web 返回状态码,主要代码,做备用,方便以后copy

 

#!/usr/bin/python
# coding=utf-8
import traceback
import urllib2,re

def FindStatus(ErrorInfo):
    ErrorInfo = str(ErrorInfo)
    Status = ['301', '302', '303', '304', '305', '306', '307', '400', '404','500', '501', '502', '503','509', '510']
    End = re.findall(r'\d{3,3}',ErrorInfo)
    if End:
        if End[0] in Status:
            return int(End[0])
        return ErrorInfo
    return ErrorInfo

def WebCheck(URL):
    try:
        REQ = urllib2.Request(URL)
        REQ.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:42.0) Gecko/20100101 Firefox/42.0')
        DATA = urllib2.urlopen(REQ,timeout=10)
        DATA.read()
        return DATA.code
    except Exception,e:
        return FindStatus(str(e))


if __name__ == "__main__":
    print WebCheck('https://mas.chinapnr.com') 

  python 模拟浏览器实现Get请求

#!/usr/bin/python
import urllib
import traceback,urllib2

def HttpGet(HttpUrl):
    try:
        a = urllib2.Request(HttpUrl)
        # a= urllib2.build_opener()
        a.add_header("User-Agent","Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:43.0) Gecko/20100101 Firefox/43.0")
        print urllib2.urlopen(a).read()

    except Exception, e:
        print traceback.format_exc()
        return False

if __name__ == '__main__':
    # HttpGet("http://127.0.0.1:8080")
    HttpGet("https://lssin.com")

 

 

 

 

 

 


本文地址: http://blog.lssin.com/readblog/36.html
版权声明:本文为原创文章,版权归  赤水 所有,欢迎分享本文,转载请保留出处!

发表评论


表情