Aploium Blog

Cold code, warm soul.

@Aploium11月前

02/28
17:55
编程

解析原始HTTP响应字节为requests.Response对象

直接上代码(请点下面的readmore)
需要Py3.4+ 不支持Py2

import io

class FakeSocket:
    def __init__(self, data=None):
        self.bytes_io = io.BytesIO(data)

    def close(self):
        pass

    def makefile(self, *args, **kwargs):
        return self.bytes_io


def bytes2response(data, level=3,
                   method=None, url="http://example.com", req_headers=None, req_files=None,
                   req_data=None, req_auth=None, req_json=None
                   ):
    """
    Construct a requests.Response from raw http response bytes(including headers)
    
    Warning: although we could decode raw bytes to response object, 
        this is not the right way these library were designed to,
        this decode may cause unexpected bugs.
    
    :param data: raw http response bytes data, including headers
    :type data: bytes
    :param level:
        level=0: decode as http.client.HTTPResponse
        level=1: decode as requests.packages.urllib3.response.HTTPResponse
        level=2: decode to requests.Response (default)
    
    :rtype: requests.Response
    """
    # These imports can be moved outside to gain slight performance improvement
    #   they are placed here by default to avoid compatible issues
    import http.client
    import requests.packages
    import requests.adapters

    fake_socket = FakeSocket(data)
    resp_builtin = http.client.HTTPResponse(fake_socket, method=method, url=url)  # type: http.client.HTTPResponse
    resp_builtin.begin()
    if level == 0:
        return resp_builtin, resp_builtin.read()  # type: http.client.HTTPResponse,bytes

    # resolve to the requests builtin urllib3 HTTPResponse
    resp_requests_basic = requests.packages.urllib3.response.HTTPResponse.from_httplib(resp_builtin)
    if level == 1:
        return resp_requests_basic  # type: requests.packages.urllib3.response.HTTPResponse

    # fake Request
    req = requests.Request(
        method=method, url=url, headers=req_headers, files=req_files,
        data=req_data, auth=req_auth, json=req_json
    )
    req = req.prepare()

    # fake adapter, which is necessarily for response construct
    adapter = requests.adapters.HTTPAdapter()

    # resolve to the wellknown/often-see requests.Response
    wellknown_resp = adapter.build_response(req, resp_requests_basic)
    wellknown_resp._content = resp_requests_basic.data

    return wellknown_resp  # type: requests.Response


用法, 先看一个普通的, 网上随便找的一个网站:

raw_plain = b'HTTP/1.1 200 OK\r\nCache-Control: private\r\nContent-Length: 4418\r\nContent-Type: text/html; charset=utf-8\r\nServer: Microsoft-IIS/7.5\r\nX-AspNet-Version: 4.0.30319\r\nX-Powered-By: ASP.NET\r\nDate: Fri, 17 Feb 2017 03:05:11 GMT\r\nConnection: close\r\n\r\n\r\n\r\n<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\r\n<html xmlns="http://www.w3.org/1999/xhtml">\r\n<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><title>\r\n\tLogin : Ojas Email Management System\r\n</title><link href="css/login.css" rel="stylesheet" type="text/css" /><link href="http://fonts.googleapis.com/css?family=Open+Sans:400,300italic,300,400italic,600,600italic,700,700italic,800,800italic" rel="stylesheet" type="text/css" />\r\n    <script type="text/javascript" src="js/jquery-1.11.0.js"></script>\r\n    <script type="text/javascript" src="js/jquery.cycle2.min.js"></script>\r\n    <script type="text/javascript">\r\n        $(document).ready(function () {\r\n            $(\'.slider\').cycle({\r\n                fx: \'fade\' // choose your transition type, ex: fade, scrollUp, shuffle, etc...\r\n            });\r\n        });\r\n    </script>\r\n</head>\r\n<body>\r\n    <form name="form1" method="post" action="" id="form1">\r\n<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwULLTEyMDAzNDIwNjEPZBYCAgMPZBYCAgQPEA8WBh4NRGF0YVRleHRGaWVsZAUITWFzdE5hbWUeDkRhdGFWYWx1ZUZpZWxkBQZNYXN0SWQeC18hRGF0YUJvdW5kZ2QQFQIUY3VzdG9tZXJAb2phc2xsYy5jb20XaW5mb0Bmb3Jlc2lnaHRtZWRpYS5uZXQVAgEyATEUKwMCZ2dkZGT6jkuXGVt0B15rRmgmFhbr5WCJlwAWXnJWL7nKooXWnw==" />\r\n\r\n<input type="hidden" name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="/wEdAAYLXxKIlVFzEacOUuQXkUOJY3plgk0YBAefRz3MyBlTcHY2+Mc6SrnAqio3oCKbxYZ5ih8pAI/1HHQJgNJssx9EN3KUI9iKk3prGGc6r3MWRaKeKEbp39eHc9mbdvkCgxCuTbPnKeSt/+KcifDxzk06/XHtxPwI8/TlPXdW7kJkuw==" />\r\n    <div class="main">\r\n        <div class="header" style="text-align: center;">\r\n            <div class="wrap">\r\n                \r\n                <h1 class="mTitle">\r\n                    Welcome to Ojas Email Management System</h1>\r\n                <div class="nlogo">\r\n                    </div>\r\n            </div>\r\n        </div>\r\n        \r\n        <div class="page">\r\n            <div class="wrap" style="min-height: 450px;">\r\n                <div class="loginPage">\r\n                    <div class="leftpart">\r\n                        <div class="proImg">\r\n                            <div class="slider">\r\n                                <img src="images/email.jpg" alt=""/>\r\n                            </div>\r\n                        </div>\r\n                    </div>\r\n                    <div class="rightpart">\r\n                        <div class="loginform">\r\n                            <h2>\r\n                                User Login</h2>\r\n                            <div class="form-group">\r\n                                <p>\r\n                                    User Name</p>\r\n                                <input name="txtUserName" type="text" id="txtUserName" class="form-control" />\r\n                                \r\n                            </div>\r\n                            <div class="form-group">\r\n                                <p>\r\n                                    Password</p>\r\n                                <input name="txtPassword" type="password" id="txtPassword" class="form-control" />\r\n                                \r\n                            </div>\r\n                            <div class="form-group">\r\n                                <p>\r\n                                    Email Id</p>\r\n                                <select name="ddlEmailId" id="ddlEmailId">\r\n\t<option value="2">customer@ojasllc.com</option>\r\n\t<option value="1">info@foresightmedia.net</option>\r\n\r\n</select>\r\n                                \r\n                            </div>\r\n                            <div class="form-group">\r\n                                <span id="lblMessage"></span>\r\n                            </div>\r\n                            <div class="form-group">\r\n                                <input type="submit" name="btnLogin" value="Login" onclick="javascript:WebForm_DoPostBackWithOptions(new WebForm_PostBackOptions(&quot;btnLogin&quot;, &quot;&quot;, true, &quot;User&quot;, &quot;&quot;, false, false))" id="btnLogin" class="btn btn-default" />\r\n                            </div>\r\n                        </div>\r\n                    </div>\r\n                </div>\r\n            </div>\r\n        </div>\r\n        <div class="footer">\r\n            Copyright \xc2\xa9\r\n            2017. All Rights Reserved.</div>\r\n    </div>\r\n    </form>\r\n</body>\r\n</html>\r\n'

# ------------ main ----------------
resp = bytes2response(raw, level=3)

print(resp.headers)
print(resp.text)


因为requests自身带有对gzip的支持, 所以能直接解码gzip的响应

raw_gzipped = (
    "SFRUUC8xLjEgMjAwIE9LDQpEYXRlOiBGcmksIDE3IEZlYiAyMDE3IDAzOjA1OjAw"
    "IEdNVA0KU2VydmVyOiBBcGFjaGUvMi40LjE4IChVYnVudHUpDQpMYXN0LU1vZGlm"
    "aWVkOiBNb24sIDE4IEFwciAyMDE2IDE2OjMyOjUwIEdNVA0KRVRhZzogIjJjZjYt"
    "NTMwYzRlOTVjNjg3MC1nemlwIg0KQWNjZXB0LVJhbmdlczogYnl0ZXMNClZhcnk6"
    "IEFjY2VwdC1FbmNvZGluZw0KQ29udGVudC1FbmNvZGluZzogZ3ppcA0KQ29udGVu"
    "dC1MZW5ndGg6IDMyNTYNCkNvbm5lY3Rpb246IGNsb3NlDQpDb250ZW50LVR5cGU6"
    "IHRleHQvaHRtbA0KDQofiwgAAAAAAAADxVrrc9s2Ev/uvwJVp9MkJ5GWnbqyIruT"
    "+DHpTNp4UrV3/eSDSEjCGCJYAJSsPv732wVAik8pSZucJo4kEtgXdn/7EI8mX1y/"
    "vZr+endDlmYlyN3Pr958f0V6gzD89+lVGF5Pr8l/Xk9/eEOGwTGZKppobrhMqAjD"
    "mx97pLc0Jh2H4WazCTangVSLcPoufERaQ9zsPw5MaWcQm7h3eTSxDB9XItEXLWSG"
    "5+fnbjesJWTyxWAAb4T8IGM+5ywmcyVXxCwZuWYzThMiFV9wIE/mUpGfZ1liMrvh"
    "DdWGZGlMDYvH5OR4+HxwfDoYntubPzE2JshcA3dBsyRapjQOEmbCWbbQ4fBkNDo7"
    "P4a1g4EVY8lofGm3TlbMULt3wH7L+PqidyUTwxIzmG5T1iOR+3bRM+zRhKjICxIt"
    "qdLMXPw8vR2MeiT0lAw3gl2+TGm0ZCdedlBrTjNhyB1dgIzfG7KR6kFPQrfYbdRm"
    "KxgxwM+zibTukRWLOb3o6UgxlljrPSN/2A0rqsBGY3KcPpb/XtiboHjMk0Xr3b+O"
    "4L+ZjLd95yd/VHecwsrS34sje3tGo4eFklkSDyIppBqTL69H169uTvz9OVhoMKcr"
    "LrZj8gtTMU1on2jwlIFmis9f7FZp/jsYYThMjbuIyg6o4AtQJgIrM1VIGfN1sKI8"
    "uU/BcLmg0jnfmCgmqOFr5sjEXKeCAndDZ4J5sTY8NssxGR0fF5o4uw1m0hi5GjsV"
    "S9cFm5sxoZmRlcvgkMvK9W4LO3tJFTM18PxPcib+cm7Ck+HJt6ejyi3rBmOipeBx"
    "t+1v7cvfP2BAtN09+jpT3oRL5rQ5P8/l8nIOj4+/2sP0m9uz2287aesUIrfqm8Nv"
    "Ssb5Zmedsh+MkGVxceNFm0kRd3Liq0WN0WmJz/P6KYxbIwCJWle5l/N7H+Dak40E"
    "o7ANfSH3GvCB/DAbzvQBQdN5cI7XPtnuuWGrTgGbJ5jL9rzdQQ/Ill88tq9mpL6v"
    "vLR2Umclac5qx+F33msWYYS3nPE/bOMaw3tcXMdDNN8I3cq97zFP2auPnVfv55Qq"
    "VtNx5C0zagXzkb8zqohShZphDWo8nsQAdyxuBaFchUNhX9LR0eQG7Bq9h5oVJQvk"
    "PXD6bm8m+t03Bd93WMM6g3xvBQsree/Mu9fZPvca3ZxfvTqp+UHuc51AVkt+J1+9"
    "V/KrinyvoFb6o0uuq+uT4fPbfdsXim2795/fnp+Ozor9wVxISK/J4p4JtgLRDuRf"
    "u/wDcKG/79hz1LDmiVkkFXU8E5mwPVY+yHUsePLQP7RozUFHFh9cRyPUP8fkBiQc"
    "lmYp14UnthxJJTSbrrY/dKq6diwpabpvWUXRtlrw6vr25qwTGw8J+jFmyFmWqK8h"
    "kKA3kI7UX66uDi1a2YI/zCv+CZa/vu6GjZBOqYbOpSg1e+5e9W65AqnHRrEBtmB1"
    "olV00Qs5qKvDzPYAAyEXMkiTRY9QAZ2E7wzewNVezqFBNSyRteVV18LdOkL29B47"
    "aiGSK7QMQU3o4qAva6rdrJH2KV/aWIPbFiyqiD19+erNDXl7S67e/ji9+XH6U0lY"
    "K14bj47A2mufCXR6is0vel/Smczg3kt8m4T08tNzhKYxWTDdu7xyHz4PVx1J6GIv"
    "f8K3z8NxzoXVUiZzviD2Wzdj/8V15XU56jVhiwBHH+F+kEmrouNOHl/0vE/ULEOK"
    "pv2L9zFeG8xV2aXlb+41XXJN4B/OQWIftBsmIrlixPa/mYb0byQkRW3sKkiLCjiQ"
    "Bik4Z5cwiZzblTkmQC+OSEvnBtuoREMFJ/zCJAcMvdVw0jpoUAUTgHgzimLAcqRr"
    "pyVUYHVgRYTLboTTdzOdzZJHS7vSE3eCNEjD1QcKxeECOcD5QLKJWwSYk63MSARA"
    "qOAYgS6sRr59qEXJitEErUdNSWXyejq9y9X2CoP41DSIW2KYDFEEPGqUJlVoSrEN"
    "yK/AWC9lJmLIHpeKpYJGzG1C956Es0vyRMgIR1Nt5CfGXIZrqnAuZidIIU9i9hjg"
    "x0kIN5+SGZtLxeywiScZsofDdkfJUHFVVqZsnUmYlqOg3b288ShwoFBGqRUV6FHK"
    "uQjqzGZOf5rE0C8kXxvykMgNnKG1qDc1WKdB2sZM369RcgbQsa0fR27ZKAOfTQws"
    "yBK6plwgzpA4A1vKBmFMyBBINIlYqzcgYWQIUEDATppro/skheZYOztC3VJw/1oT"
    "GkMfD4sUFglNC+6+dUZ2FUbaIaQA+QaIuJdDxcxH6Fs4zDVnm0+GKy7yQPscBHJw"
    "iSpyYODx+Zzh6RQD2QaxLAXrMbpqJ9K3vqNTAeEIJyfBV0E7nONiBgBfNnwFDVCM"
    "c90GaTxqRR3Kb7hZ5pBhpBQ6AHhkNYkdULU5JEToPBPgY7GMMswSwJInjWVhplWo"
    "lxASISwMqbNP+O7m5fUPN4EDsmDxO8Z2QN4xsI2FXxvzUGSiYyGbBt2cqxUzINfl"
    "r8VOG24el4xmYm5xbdY0+RxrYTLbEhpFTGuLC7C/SLbhiiYZFb1L9455lnAXGwg6"
    "XqsBCGVxpgN7QSCqSwhpAR4R0aNNPVj2+1zzsAQF9DFWe1Azd8WyEfZmI3RPimYX"
    "Qm70eJ80Cor9kJndeR79CXWt/4IdyPzoT7/4v1jwplIZ7a/D95WM9QAwB0AlLhbi"
    "jWcBlB5xZe+z3Tb80NhWW4Mo1KRdWlTRSrGKXplomrm4KXj3TULKfmAZWUfIaw0E"
    "2eph7SOFkRxgKZBmxu1POQO/hMhYMPiq0FN5EokMxyrQ6wjI1cgCv30QF8hISwZR"
    "bqjCgg+gpxY3zZSwsx4a5G+Za+cVhbGo2NCt9sqVfrXaR6pp3J0FgWRe0sUMsA9y"
    "E7MqCkhRzBrMStGKlqUXCCRX3ryJywzaIXFRnnhsgeyrjXQQTJMtoHFLXv2HjHjV"
    "UBpNV8BSOcxCa+L+oSMph5jbYpXEW5XI8vdijsWxVBwY21qgJQOUXim6WZQJqupp"
    "JuFpyoz2xSygLKIlyJ+BSn2yEFDwiA9wbUUXmA1gK2DhmisDoE2WUtdyKdxXTKfM"
    "Tl6gAv1U5wRQvXVVIXKy5StEsN6ucHpkQziv0vZRabq4LuKDq5Im+4g8GxS8QqCY"
    "YUGAx+KyP5R0vgCfMX8K8T5qoEVmsyVWzksmsEA85GL77rtFedb1v3KDHFgV6yB2"
    "1QL+3B0t+GDGE7wXwOfvfsuY2l7QE5aA0/Qu/QfM03td/m/zi7n2DN2ngxzb64PP"
    "ayKMZGcj/PRZjJSz9B8P8zxoJ/sCbPo/2xKD0tnS5bHPYMucpf9YGfq0cUNLBvjo"
    "RgMoMMRtx+mKbciQAA22jp9j82pL60+IiQQUpGprW1Y/M/AFFLm2zarNZJC/oX/e"
    "R4olAPEysT+gQPvPEdwA2n0m7OqifHmcMBZjZdXWFpRetkSCvAe5HTJV7LonO3DA"
    "MhhKLxPEeTXsciMYtFQTRsaNILrtSWxPdQWGQEC1pLF3Qieo0HU5FxqvDYfCL5Fu"
    "aGaHI1aoA9VSqzkOnXL5e7lI9n30h7bxpUYeWiYlZds00L3yxo68g1X6vRgf7ODb"
    "+6lX29w2/bwxiiVEBlqYYkvkm0MwsJLZYllUyjMF7ZLtW5tNMltdQhE4CeHdFYkU"
    "k62bB0lw7HyaBb5anl51FGt14MC3OHDeYVEDrKnDk+B5CKkI/+5x+gQOE7gHw9Js"
    "Jnh07+ZhLahRLuee2M7Al3tPiypw1887b3yCwAFWaA6s0hR4uTLraeBHY6qYUbmy"
    "gVoD5r08QU9oEMpNxIRmIBNUUU90BkWibaWdTFqt/XwPx28rurVRjUEN9aRhWOtb"
    "7p1TBMu5UH9bEC63uI3mrho0tR65u2fPA7DwsbIIYJnGFNMD+K8ya9DDlmNFH/zo"
    "Um6SSrELVk5wLp9TC4r5dzGFao5oJT4yseYy0/jjM0758qo8VXLNY/g6Y8a4cX+m"
    "uNkSHDr4IfhMPu4xy0fCRQEW+IxhJ1K8Y9jJoVfduXFlAy3a2H7UuO/OTT8xM+W9"
    "lv8FEiR0cYETNTSmslIRlNwnpAa1fFBTDIl2g7mAvJYbnO/1CSyJHgABGtt3iIAP"
    "ZSKjoPpkphMt/JcGH4lY7se9S/YIcQH2alAEGl5u+4tSPjRXhYEpxNcGlzXmu+9l"
    "t7JNsBThcx6hrXy/t4vvu9d3FngkDj300zZnLRUz3n6Q+RGyffXQGAF2ipw7ZzHl"
    "L3lN+WPJeYofw73H7HSuI3WxMn9e1x7ndxA/FwqHnujxu1+1m4/3fm9/5rZUBu4Z"
    "4WP/I/cveK3joeOefwzxonc67LnH1y56I/skbYH/hQ0KHaGUsD/dTyz6gEX+B+6y"
    "eSz2LAAA"
)
import base64
raw_bytes = base64.b64decode(raw_gzipped)

# --------- main -------------
resp = bytes2response(raw, level=3)

print(resp.headers)
print(resp.text)


解析原始HTTP响应字节为requests.Response对象

来一发吐槽叭O(∩_∩)O    仅首次吐槽时需要审核