0
点赞
收藏
分享

微信扫一扫

requests库 文件上传源码解读:多字段多文件上传,单字段多文件上传

代码小姐 2022-09-20 阅读 48

先说下多字段多文件的上传

# 多个字段文件上传,org explain format is: ("filename", "fileobject", "content-type", "headers")
{
"field1" : ("filepath中的文件名称", open("filePath1", "rb")),
"field2" : ("filename2", open("filePath2", "rb"), "image/jpeg"),
"field3" : ("filename3", open("filePath3", "rb"), "image/jpeg", {"refer" : "localhost"})
}
### but we can simple usage by following :
>> files={
"field1" : open("filePath1", "rb"),
"field2" : open("filePath2", "rb"),
"field3" : open("filePath3", "rb")
}
也可以使用元组:
files= [
(
"field1",open("test1.png","rb")
),
(
'filed2',open('a2.xlsx','rb').read()
)
]
>> r=request.post(url='http://httpbin.org/post',data={"user":"tester","signature":"md5"},files=files)
>> print(r.json())

 

再说下单个字段,多个文件上传实现:

# 单个字段上传多个文件如:filed1:
files=[("filed1",open("test1.png","rb")),
("filed1",open('a2.xlsx','rb'))
]
a=requests.post(url="http://httpbin.org/post",data=None,files=files)
print(a.text)

 

源码实现解读:

requests库> sessions.py > Session.request方法:

def request(self, method, url,
params=None, data=None, headers=None, cookies=None, files=None,
auth=None, timeout=None, allow_redirects=True, proxies=None,
hooks=None, stream=None, verify=None, cert=None, json=None):

# Create the Request.
req = Request(
method=method.upper(),
url=url,
headers=headers,
files=files,
data=data or {},
json=json,
params=params or {},
auth=auth,
cookies=cookies,
hooks=hooks,
)
prep = self.prepare_request(req)

 

这里 prep = self.prepare_request(req) 预处理接着往下走看到p.prepare()方法:

p.prepare(
method=request.method.upper(),
url=request.url,
files=request.files,
data=request.data,
json=request.json,
headers=merge_setting(request.headers, self.headers, dict_class=CaseInsensitiveDict),
params=merge_setting(request.params, self.params),
auth=merge_setting(auth, self.auth),
cookies=merged_cookies,
hooks=merge_hooks(request.hooks, self.hooks),
)
return p

  接着再进去看到self.prepare_body(data, files, json)

def prepare(self,
method=None, url=None, headers=None, files=None, data=None,
params=None, auth=None, cookies=None, hooks=None, json=None):
"""Prepares the entire request with the given parameters."""

self.prepare_method(method)
self.prepare_url(url, params)
self.prepare_headers(headers)
self.prepare_cookies(cookies)
self.prepare_body(data, files, json)
self.prepare_auth(auth, url)

# Note that prepare_auth must be last to enable authentication schemes
# such as OAuth to work on a fully prepared request.

# This MUST go after prepare_auth. Authenticators could add a hook
self.prepare_hooks(hooks)

 接着prepare_body:

def prepare_body(self, data, files, json=None):
"""Prepares the given HTTP body data."""
if is_stream:
try:
length = super_len(data)
except (TypeError, AttributeError, UnsupportedOperation):
length = None

body = data

if getattr(body, 'tell', None) is not None:
# Record the current file position before reading.
# This will allow us to rewind a file in the event
# of a redirect.
try:
self._body_position = body.tell()
except (IOError, OSError):
# This differentiates from None, allowing us to catch
# a failed `tell()` later when trying to rewind the body
self._body_position = object()

if files:
raise NotImplementedError('Streamed bodies and files are mutually exclusive.')

if length:
self.headers['Content-Length'] = builtin_str(length)
else:
self.headers['Transfer-Encoding'] = 'chunked'
else:
# Multi-part file uploads.
if files:
(body, content_type) = self._encode_files(files, data)
else:
if data:
body = self._encode_params(data)
if isinstance(data, basestring) or hasattr(data, 'read'):
content_type = None
else:
content_type = 'application/x-www-form-urlencoded'

self.prepare_content_length(body)

# Add content-type if it wasn't explicitly provided.
if content_type and ('content-type' not in self.headers):
self.headers['Content-Type'] = content_type

self.body = body

  

 这个方法主要调用了2个静态方法一个是_encode_params(data),一个_encode_files

@staticmethod
def _encode_files(files, data):
"""Build the body for a multipart/form-data request.

Will successfully encode files when passed as a dict or a list of
tuples. Order is retained if data is a list of tuples but arbitrary
if parameters are supplied as a dict.
The tuples may be 2-tuples (filename, fileobj), 3-tuples (filename, fileobj, contentype)
or 4-tuples (filename, fileobj, contentype, custom_headers).
"""
if (not files):
raise ValueError("Files must be provided.")
elif isinstance(data, basestring):
raise ValueError("Data must not be a string.")

new_fields = []
fields = to_key_val_list(data or {})
files = to_key_val_list(files or {})

for field, val in fields:
if isinstance(val, basestring) or not hasattr(val, '__iter__'):
val = [val]
for v in val:
if v is not None:
# Don't call str() on bytestrings: in Py3 it all goes wrong.
if not isinstance(v, bytes):
v = str(v)

new_fields.append(
(field.decode('utf-8') if isinstance(field, bytes) else field,
v.encode('utf-8') if isinstance(v, str) else v))

for (k, v) in files:
# support for explicit filename
ft = None
fh = None
if isinstance(v, (tuple, list)):
if len(v) == 2:
fn, fp = v
elif len(v) == 3:
fn, fp, ft = v
else:
fn, fp, ft, fh = v
else:
fn = guess_filename(v) or k
fp = v

if isinstance(fp, (str, bytes, bytearray)):
fdata = fp
elif hasattr(fp, 'read'):
fdata = fp.read()
elif fp is None:
continue
else:
fdata = fp

rf = RequestField(name=k, data=fdata, filename=fn, headers=fh)
rf.make_multipart(content_type=ft)
new_fields.append(rf)

body, content_type = encode_multipart_formdata(new_fields)

return body, content_type

 

@staticmethod
def _encode_params(data):
"""
     判断如果是string或者字节直接返回,如果是元组或者dict可迭代对象返回编码encode格式: a=2&c=4

"""

if isinstance(data, (str, bytes)):
return data
elif hasattr(data, 'read'):
return data
elif hasattr(data, '__iter__'):
result = []
for k, vs in to_key_val_list(data):
if isinstance(vs, basestring) or not hasattr(vs, '__iter__'):
vs = [vs]
for v in vs:
if v is not None:
result.append(
(k.encode('utf-8') if isinstance(k, str) else k,
v.encode('utf-8') if isinstance(v, str) else v))
return urlencode(result, doseq=True)
else:
return data

  

from requests.models import RequestEncodingMixin
import json
datas={"key":2222,"name":"test"}
data=[('a',1),('b',2)]
d=RequestEncodingMixin._encode_params(json.dumps(datas))
e2=RequestEncodingMixin._encode_params(data)
encodes=RequestEncodingMixin._encode_params(datas)
print(d)
print(e2)
print(encodes)

{"key": 2222, "name": "test"}
a=1&b=2
key=2222&name=test

 到这里问题来了,那么看源码什么时候使用mutiform-data,什么时候用'application/x-www-form-urlencoded',我们回到models.prepare_body方法

def prepare_body(self, data, files, json=None):
"""Prepares the given HTTP body data."""

# Check if file, fo, generator, iterator.
# If not, run through normal process.

# Nottin' on you.
body = None
content_type = None
# 如果是没有data参数且json不为空, content_type = 'application/json,

if not data and json is not None:
# urllib3 requires a bytes-like body. Python 2's json.dumps
# provides this natively, but Python 3 gives a Unicode string.
content_type = 'application/json'
# 序列化obj为json str 对象
body = complexjson.dumps(json)
# 如果序列化后不是字节,进行编码utf-8,将对象转字节
if not isinstance(body, bytes):
body = body.encode('utf-8')

is_stream = all([
hasattr(data, '__iter__'),
not isinstance(data, (basestring, list, tuple, Mapping))
])

if is_stream:
try:
length = super_len(data)
except (TypeError, AttributeError, UnsupportedOperation):
length = None

body = data

if getattr(body, 'tell', None) is not None:
# Record the current file position before reading.
# This will allow us to rewind a file in the event
# of a redirect.
try:
self._body_position = body.tell()
except (IOError, OSError):
# This differentiates from None, allowing us to catch
# a failed `tell()` later when trying to rewind the body
self._body_position = object()

if files:
raise NotImplementedError('Streamed bodies and files are mutually exclusive.')

if length:
self.headers['Content-Length'] = builtin_str(length)
else:
self.headers['Transfer-Encoding'] = 'chunked'
else:
# Multi-part file uploads.
if files:
print("#########enter mutil-formdata#########")
(body, content_type) = self._encode_files(files, data)
print('#### body of muti-formdata is %s'%body)
else:
if data:
# dict转a&1=b&2
print("#### enter 'application/x-www-form-urlencoded'############")
body = self._encode_params(data)
print("body of 'application/x-www-form-urlencoded' is %s"%body)
if isinstance(data, basestring) or hasattr(data, 'read'):
content_type = None
else:
content_type = 'application/x-www-form-urlencoded'

self.prepare_content_length(body)

# Add content-type if it wasn't explicitly provided.
if content_type and ('content-type' not in self.headers):
self.headers['Content-Type'] = content_type

self.body = body

requests库 文件上传源码解读:多字段多文件上传,单字段多文件上传_字段

 

 

如果传入files对象:

 

requests库 文件上传源码解读:多字段多文件上传,单字段多文件上传_字段_02

 

 

 手动实现muti-fomdata body核心代码:

b'--f872e4372df27ae9bd51ebbecc6028d7\r\nContent-Disposition: form-data; name="key"\r\n\r\n2222\r\n--f872e4372df27ae9bd51ebbecc6028d7\r\nContent-Disposition: form-data; name="name"\r\n\r\ntest\r\n--f872e4372df27ae9bd51ebbecc6028d7\r\nContent-Disposition: form-data; name="filed1"; filename="test1.png"\r\n\r\n\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x03\x89\x00\x00\x02X\x08\x02\x00\x00\x00@Y4<\x00\x00\x97\x1aIDATx\x9c\xe
........ multipart/form-data; boundary=f872e4372df27ae9bd51ebbecc6028d7

  

requests库 文件上传源码解读:多字段多文件上传,单字段多文件上传_字段_03



举报

相关推荐

0 条评论