• requests库 文件上传源码解读:多字段多文件上传,单字段多文件上传


    先说下多字段多文件的上传

            # 多个字段文件上传,org explain format is: ("filename", "fileobject", "content-type", "headers")
            {
              "field1" : ("filepath中的文件名称", open("filePath1", "rb")),
              "field2" : ("filename2", open("filePath2", "rb"), "image/jpeg"),
              "field3" : ("filename3", open("filePath3", "rb"), "image/jpeg", {"refer" : "localhost"})
            }
            ### but we can simple usage by following :
            >> files={
              "field1" : open("filePath1", "rb"),
              "field2" : open("filePath2", "rb"),
              "field3" : open("filePath3", "rb")
            }
            也可以使用元组:
            files=  [
                (
                "field1",open("test1.png","rb")
                ),
                (
                'filed2',open('a2.xlsx','rb').read()
                )
             ]
            >> r=request.post(url='http://httpbin.org/post',data={"user":"tester","signature":"md5"},files=files)
            >> print(r.json())
    

     

    再说下单个字段,多个文件上传实现

            # 单个字段上传多个文件如:filed1:
                files=[("filed1",open("test1.png","rb")),
                ("filed1",open('a2.xlsx','rb'))
            ]
        a=requests.post(url="http://httpbin.org/post",data=None,files=files)
        print(a.text)
    

     

    源码实现解读:

    requests库> sessions.py > Session.request方法:

        def request(self, method, url,
                params=None, data=None, headers=None, cookies=None, files=None,
                auth=None, timeout=None, allow_redirects=True, proxies=None,
                hooks=None, stream=None, verify=None, cert=None, json=None):
    
            # Create the Request.
            req = Request(
                method=method.upper(),
                url=url,
                headers=headers,
                files=files,
                data=data or {},
                json=json,
                params=params or {},
                auth=auth,
                cookies=cookies,
                hooks=hooks,
            )
            prep = self.prepare_request(req)
    

    这里 prep = self.prepare_request(req) 预处理接着往下走看到p.prepare()方法:

            p.prepare(
                method=request.method.upper(),
                url=request.url,
                files=request.files,
                data=request.data,
                json=request.json,
                headers=merge_setting(request.headers, self.headers, dict_class=CaseInsensitiveDict),
                params=merge_setting(request.params, self.params),
                auth=merge_setting(auth, self.auth),
                cookies=merged_cookies,
                hooks=merge_hooks(request.hooks, self.hooks),
            )
            return p
    

      接着再进去看到self.prepare_body(data, files, json)

        def prepare(self,
                method=None, url=None, headers=None, files=None, data=None,
                params=None, auth=None, cookies=None, hooks=None, json=None):
            """Prepares the entire request with the given parameters."""
    
            self.prepare_method(method)
            self.prepare_url(url, params)
            self.prepare_headers(headers)
            self.prepare_cookies(cookies)
            self.prepare_body(data, files, json)
            self.prepare_auth(auth, url)
    
            # Note that prepare_auth must be last to enable authentication schemes
            # such as OAuth to work on a fully prepared request.
    
            # This MUST go after prepare_auth. Authenticators could add a hook
            self.prepare_hooks(hooks)
    

     接着prepare_body:

        def prepare_body(self, data, files, json=None):
            """Prepares the given HTTP body data."""
             if is_stream:
                try:
                    length = super_len(data)
                except (TypeError, AttributeError, UnsupportedOperation):
                    length = None
    
                body = data
    
                if getattr(body, 'tell', None) is not None:
                    # Record the current file position before reading.
                    # This will allow us to rewind a file in the event
                    # of a redirect.
                    try:
                        self._body_position = body.tell()
                    except (IOError, OSError):
                        # This differentiates from None, allowing us to catch
                        # a failed `tell()` later when trying to rewind the body
                        self._body_position = object()
    
                if files:
                    raise NotImplementedError('Streamed bodies and files are mutually exclusive.')
    
                if length:
                    self.headers['Content-Length'] = builtin_str(length)
                else:
                    self.headers['Transfer-Encoding'] = 'chunked'
            else:
                # Multi-part file uploads.
                if files:
                    (body, content_type) = self._encode_files(files, data)
                else:
                    if data:
                        body = self._encode_params(data)
                        if isinstance(data, basestring) or hasattr(data, 'read'):
                            content_type = None
                        else:
                            content_type = 'application/x-www-form-urlencoded'
    
                self.prepare_content_length(body)
    
                # Add content-type if it wasn't explicitly provided.
                if content_type and ('content-type' not in self.headers):
                    self.headers['Content-Type'] = content_type
    
            self.body = body
    

      

     这个方法主要调用了2个静态方法一个是_encode_params(data),一个_encode_files

    @staticmethod
    def _encode_files(files, data):
    """Build the body for a multipart/form-data request.

    Will successfully encode files when passed as a dict or a list of
    tuples. Order is retained if data is a list of tuples but arbitrary
    if parameters are supplied as a dict.
    The tuples may be 2-tuples (filename, fileobj), 3-tuples (filename, fileobj, contentype)
    or 4-tuples (filename, fileobj, contentype, custom_headers).
    """
    if (not files):
    raise ValueError("Files must be provided.")
    elif isinstance(data, basestring):
    raise ValueError("Data must not be a string.")

    new_fields = []
    fields = to_key_val_list(data or {})
    files = to_key_val_list(files or {})

    for field, val in fields:
    if isinstance(val, basestring) or not hasattr(val, '__iter__'):
    val = [val]
    for v in val:
    if v is not None:
    # Don't call str() on bytestrings: in Py3 it all goes wrong.
    if not isinstance(v, bytes):
    v = str(v)

    new_fields.append(
    (field.decode('utf-8') if isinstance(field, bytes) else field,
    v.encode('utf-8') if isinstance(v, str) else v))

    for (k, v) in files:
    # support for explicit filename
    ft = None
    fh = None
    if isinstance(v, (tuple, list)):
    if len(v) == 2:
    fn, fp = v
    elif len(v) == 3:
    fn, fp, ft = v
    else:
    fn, fp, ft, fh = v
    else:
    fn = guess_filename(v) or k
    fp = v

    if isinstance(fp, (str, bytes, bytearray)):
    fdata = fp
    elif hasattr(fp, 'read'):
    fdata = fp.read()
    elif fp is None:
    continue
    else:
    fdata = fp

    rf = RequestField(name=k, data=fdata, filename=fn, headers=fh)
    rf.make_multipart(content_type=ft)
    new_fields.append(rf)

    body, content_type = encode_multipart_formdata(new_fields)

    return body, content_type
        @staticmethod
        def _encode_params(data):
            """
         判断如果是string或者字节直接返回,如果是元组或者dict可迭代对象返回编码encode格式: a=2&c=4
    
            """
    
            if isinstance(data, (str, bytes)):
                return data
            elif hasattr(data, 'read'):
                return data
            elif hasattr(data, '__iter__'):
                result = []
                for k, vs in to_key_val_list(data):
                    if isinstance(vs, basestring) or not hasattr(vs, '__iter__'):
                        vs = [vs]
                    for v in vs:
                        if v is not None:
                            result.append(
                                (k.encode('utf-8') if isinstance(k, str) else k,
                                 v.encode('utf-8') if isinstance(v, str) else v))
                return urlencode(result, doseq=True)
            else:
                return data
    

      

    from requests.models import RequestEncodingMixin
    import json
    datas={"key":2222,"name":"test"}
    data=[('a',1),('b',2)]
    d=RequestEncodingMixin._encode_params(json.dumps(datas))
    e2=RequestEncodingMixin._encode_params(data)
    encodes=RequestEncodingMixin._encode_params(datas)
    print(d)
    print(e2)
    print(encodes)

    {"key": 2222, "name": "test"}
    a=1&b=2
    key=2222&name=test

     到这里问题来了,那么看源码什么时候使用mutiform-data,什么时候用'application/x-www-form-urlencoded',我们回到models.prepare_body方法

    def prepare_body(self, data, files, json=None):
    """Prepares the given HTTP body data."""

    # Check if file, fo, generator, iterator.
    # If not, run through normal process.

    # Nottin' on you.
    body = None
    content_type = None
    # 如果是没有data参数且json不为空, content_type = 'application/json,
        if not data and json is not None:
    # urllib3 requires a bytes-like body. Python 2's json.dumps
    # provides this natively, but Python 3 gives a Unicode string.
    content_type = 'application/json'
    # 序列化obj为json str 对象
    body = complexjson.dumps(json)
    # 如果序列化后不是字节,进行编码utf-8,将对象转字节
    if not isinstance(body, bytes):
    body = body.encode('utf-8')

    is_stream = all([
    hasattr(data, '__iter__'),
    not isinstance(data, (basestring, list, tuple, Mapping))
    ])

    if is_stream:
    try:
    length = super_len(data)
    except (TypeError, AttributeError, UnsupportedOperation):
    length = None

    body = data

    if getattr(body, 'tell', None) is not None:
    # Record the current file position before reading.
    # This will allow us to rewind a file in the event
    # of a redirect.
    try:
    self._body_position = body.tell()
    except (IOError, OSError):
    # This differentiates from None, allowing us to catch
    # a failed `tell()` later when trying to rewind the body
    self._body_position = object()

    if files:
    raise NotImplementedError('Streamed bodies and files are mutually exclusive.')

    if length:
    self.headers['Content-Length'] = builtin_str(length)
    else:
    self.headers['Transfer-Encoding'] = 'chunked'
    else:
    # Multi-part file uploads.
    if files:
    print("#########enter mutil-formdata#########")
    (body, content_type) = self._encode_files(files, data)
    print('#### body of muti-formdata is %s'%body)
    else:
    if data:
    # dict转a&1=b&2
    print("#### enter 'application/x-www-form-urlencoded'############")
    body = self._encode_params(data)
    print("body of 'application/x-www-form-urlencoded' is %s"%body)
    if isinstance(data, basestring) or hasattr(data, 'read'):
    content_type = None
    else:
    content_type = 'application/x-www-form-urlencoded'

    self.prepare_content_length(body)

    # Add content-type if it wasn't explicitly provided.
    if content_type and ('content-type' not in self.headers):
    self.headers['Content-Type'] = content_type

    self.body = body

    如果传入files对象:

     手动实现muti-fomdata body核心代码:

    b'--f872e4372df27ae9bd51ebbecc6028d7
    Content-Disposition: form-data; name="key"
    
    2222
    --f872e4372df27ae9bd51ebbecc6028d7
    Content-Disposition: form-data; name="name"
    
    test
    --f872e4372df27ae9bd51ebbecc6028d7
    Content-Disposition: form-data; name="filed1"; filename="test1.png"
    
    x89PNG
    x1a
    x00x00x00
    IHDRx00x00x03x89x00x00x02Xx08x02x00x00x00@Y4<x00x00x97x1aIDATxx9cxe
    ........ multipart/form-data; boundary=f872e4372df27ae9bd51ebbecc6028d7
    

      

  • 相关阅读:
    visual studio 注释与取消注释
    visual studio错误之 无法将参数 1 从“const char [5]”转换为“char *”
    sdl_ffmpeg_video
    sdl_ffmpeg_audio
    visual studio添加第三方库文件,每次都要配置包含目录和库目录(繁琐),一劳永逸的办法
    visual studio 错误之errorC2362:“gototmpstr”跳过了“XXXX”的初始化操作
    visual studio调用SDL2和ffmpeg
    没用过得库函数
    visual studio编译错误之xx被声明为已否决
    vim把一个文件的若干行追加到另一个文件
  • 原文地址:https://www.cnblogs.com/SunshineKimi/p/13953580.html
Copyright © 2020-2023  润新知