File CVE-2021-23336-only-amp-as-query-sep.patch of Package python (Revision 7c7532457b948cc36e86ff51d95885bc)
Currently displaying revision 7c7532457b948cc36e86ff51d95885bc , Show latest
390
1
From 5c17dfc5d70ce88be99bc5769b91ce79d7a90d61 Mon Sep 17 00:00:00 2001
2
From: Senthil Kumaran <senthil@uthcode.com>
3
Date: Mon, 15 Feb 2021 11:16:43 -0800
4
Subject: [PATCH] [3.6] bpo-42967: only use '&' as a query string separator
5
(GH-24297) (GH-24532)
6
MIME-Version: 1.0
7
Content-Type: text/plain; charset=UTF-8
8
Content-Transfer-Encoding: 8bit
9
10
bpo-42967: [security] Address a web cache-poisoning issue reported in
11
urllib.parse.parse_qsl().
12
13
urllib.parse will only us "&" as query string separator by default
14
instead of both ";" and "&" as allowed in earlier versions. An optional
15
argument seperator with default value "&" is added to specify the
16
separator.
17
18
Co-authored-by: Éric Araujo <merwok@netwok.org>
19
Co-authored-by: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com>
20
Co-authored-by: Adam Goldschmidt <adamgold7@gmail.com>
21
---
22
Doc/library/cgi.rst | 8 ++-
23
Doc/library/urllib.parse.rst | 22 +++++-
24
Doc/whatsnew/3.6.rst | 13 ++++
25
Lib/cgi.py | 17 +++--
26
Lib/test/test_cgi.py | 29 ++++++--
27
Lib/test/test_urlparse.py | 68 +++++++++++++------
28
Lib/urllib/parse.py | 19 ++++--
29
.../2021-02-14-15-59-16.bpo-42967.YApqDS.rst | 1 +
30
8 files changed, 134 insertions(+), 43 deletions(-)
31
create mode 100644 Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst
32
33
--- a/Doc/library/cgi.rst
34
+++ b/Doc/library/cgi.rst
35
36
37
.. function:: parse(fp[, environ[, keep_blank_values[, strict_parsing]]])
38
39
- Parse a query in the environment or from a file (the file defaults to
40
- ``sys.stdin`` and environment defaults to ``os.environ``). The *keep_blank_values* and *strict_parsing* parameters are
41
- passed to :func:`urlparse.parse_qs` unchanged.
42
-
43
+ Parse a query in the environment or from a file (the file
44
+ defaults to ``sys.stdin`` and environment defaults to
45
+ ``os.environ``). The *keep_blank_values*, *strict_parsing*,
46
+ and *separator* parameters are passed to
47
+ :func:`urlparse.parse_qs` unchanged.
48
49
.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]])
50
51
52
Note that this does not parse nested multipart parts --- use
53
:class:`FieldStorage` for that.
54
55
+ .. versionchanged:: 3.6.13
56
+ Added the *separator* parameter.
57
+
58
59
.. function:: parse_header(string)
60
61
--- a/Lib/cgi.py
62
+++ b/Lib/cgi.py
63
64
# 0 ==> unlimited input
65
maxlen = 0
66
67
-def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
68
+def parse(fp=None, environ=os.environ, keep_blank_values=0,
69
+ strict_parsing=0, separator='&'):
70
"""Parse a query in the environment or from a file (default stdin)
71
72
Arguments, all optional:
73
74
strict_parsing: flag indicating what to do with parsing errors.
75
If false (the default), errors are silently ignored.
76
If true, errors raise a ValueError exception.
77
+
78
+ separator: str. The symbol to use for separating the query arguments.
79
+ Defaults to &.
80
"""
81
if fp is None:
82
fp = sys.stdin
83
84
else:
85
qs = ""
86
environ['QUERY_STRING'] = qs # XXX Shouldn't, really
87
- return urlparse.parse_qs(qs, keep_blank_values, strict_parsing)
88
+ return urlparse.parse_qs(qs, keep_blank_values, strict_parsing,
89
+ separator=separator)
90
91
92
# parse query string function called from urlparse,
93
94
95
def __init__(self, fp=None, headers=None, outerboundary="",
96
environ=os.environ, keep_blank_values=0, strict_parsing=0,
97
- max_num_fields=None):
98
+ max_num_fields=None, separator='&'):
99
"""Constructor. Read multipart/* until last part.
100
101
Arguments, all optional:
102
103
self.keep_blank_values = keep_blank_values
104
self.strict_parsing = strict_parsing
105
self.max_num_fields = max_num_fields
106
+ self.separator = separator
107
if 'REQUEST_METHOD' in environ:
108
method = environ['REQUEST_METHOD'].upper()
109
self.qs_on_post = None
110
111
if self.qs_on_post:
112
qs += '&' + self.qs_on_post
113
query = urlparse.parse_qsl(qs, self.keep_blank_values,
114
- self.strict_parsing, self.max_num_fields)
115
+ self.strict_parsing,
116
+ self.max_num_fields,
117
+ separator=self.separator)
118
self.list = [MiniFieldStorage(key, value) for key, value in query]
119
self.skip_lines()
120
121
122
query = urlparse.parse_qsl(self.qs_on_post,
123
self.keep_blank_values,
124
self.strict_parsing,
125
- self.max_num_fields)
126
+ self.max_num_fields,
127
+ self.separator)
128
self.list.extend(MiniFieldStorage(key, value)
129
for key, value in query)
130
FieldStorageClass = None
131
132
klass = self.FieldStorageClass or self.__class__
133
part = klass(self.fp, {}, ib,
134
environ, keep_blank_values, strict_parsing,
135
- max_num_fields)
136
+ max_num_fields,
137
+ self.separator)
138
139
# Throw first part away
140
while not part.done:
141
--- a/Lib/test/test_cgi.py
142
+++ b/Lib/test/test_cgi.py
143
144
("", ValueError("bad query field: ''")),
145
("&", ValueError("bad query field: ''")),
146
("&&", ValueError("bad query field: ''")),
147
- (";", ValueError("bad query field: ''")),
148
- (";&;", ValueError("bad query field: ''")),
149
# Should the next few really be valid?
150
("=", {}),
151
("=&=", {}),
152
- ("=;=", {}),
153
# This rest seem to make sense
154
("=a", {'': ['a']}),
155
("&=a", ValueError("bad query field: ''")),
156
157
("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
158
("a=a+b&a=b+a", {'a': ['a b', 'b a']}),
159
("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
160
- ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
161
- ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
162
("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env",
163
{'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'],
164
'cuyer': ['r'],
165
166
self.assertEqual(expect[k], v)
167
self.assertItemsEqual(expect.values(), d.values())
168
169
+ def test_separator(self):
170
+ parse_semicolon = [
171
+ ("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}),
172
+ ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
173
+ (";", ValueError("bad query field: ''")),
174
+ (";;", ValueError("bad query field: ''")),
175
+ ("=;a", ValueError("bad query field: 'a'")),
176
+ (";b=a", ValueError("bad query field: ''")),
177
+ ("b;=a", ValueError("bad query field: 'b'")),
178
+ ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
179
+ ("a=a+b;a=b+a", {'a': ['a b', 'b a']}),
180
+ ]
181
+ for orig, expect in parse_semicolon:
182
+ env = {'QUERY_STRING': orig}
183
+ fs = cgi.FieldStorage(separator=';', environ=env)
184
+ if isinstance(expect, dict):
185
+ for key in expect.keys():
186
+ expect_val = expect[key]
187
+ self.assertIn(key, fs)
188
+ if len(expect_val) > 1:
189
+ self.assertEqual(fs.getvalue(key), expect_val)
190
+ else:
191
+ self.assertEqual(fs.getvalue(key), expect_val[0])
192
+
193
def test_log(self):
194
cgi.log("Testing")
195
196
--- a/Lib/test/test_urlparse.py
197
+++ b/Lib/test/test_urlparse.py
198
199
("&a=b", [('a', 'b')]),
200
("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
201
("a=1&a=2", [('a', '1'), ('a', '2')]),
202
- (";", []),
203
- (";;", []),
204
- (";a=b", [('a', 'b')]),
205
- ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
206
- ("a=1;a=2", [('a', '1'), ('a', '2')]),
207
- (b";", []),
208
- (b";;", []),
209
- (b";a=b", [(b'a', b'b')]),
210
- (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
211
- (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
212
+ (";a=b", [(';a', 'b')]),
213
+ ("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
214
+ (b";a=b", [(b';a', b'b')]),
215
+ (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
216
]
217
218
parse_qs_test_cases = [
219
220
(b"&a=b", {b'a': [b'b']}),
221
(b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
222
(b"a=1&a=2", {b'a': [b'1', b'2']}),
223
- (";", {}),
224
- (";;", {}),
225
- (";a=b", {'a': ['b']}),
226
- ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
227
- ("a=1;a=2", {'a': ['1', '2']}),
228
- (b";", {}),
229
- (b";;", {}),
230
- (b";a=b", {b'a': [b'b']}),
231
- (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
232
- (b"a=1;a=2", {b'a': [b'1', b'2']}),
233
+ (";a=b", {';a': ['b']}),
234
+ ("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
235
+ (b";a=b", {b';a': [b'b']}),
236
+ (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
237
]
238
239
class UrlParseTestCase(unittest.TestCase):
240
241
"under NFKC normalization")
242
self.assertIsInstance(cm.exception.args[0], str)
243
244
+ def test_parse_qs_separator(self):
245
+ parse_qs_semicolon_cases = [
246
+ (";", {}),
247
+ (";;", {}),
248
+ (";a=b", {'a': ['b']}),
249
+ ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
250
+ ("a=1;a=2", {'a': ['1', '2']}),
251
+ (b";", {}),
252
+ (b";;", {}),
253
+ (b";a=b", {b'a': [b'b']}),
254
+ (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
255
+ (b"a=1;a=2", {b'a': [b'1', b'2']}),
256
+ ]
257
+ for orig, expect in parse_qs_semicolon_cases:
258
+ result = urlparse.parse_qs(orig, separator=';')
259
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
260
+
261
+
262
+ def test_parse_qsl_separator(self):
263
+ parse_qsl_semicolon_cases = [
264
+ (";", []),
265
+ (";;", []),
266
+ (";a=b", [('a', 'b')]),
267
+ ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
268
+ ("a=1;a=2", [('a', '1'), ('a', '2')]),
269
+ (b";", []),
270
+ (b";;", []),
271
+ (b";a=b", [(b'a', b'b')]),
272
+ (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
273
+ (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
274
+ ]
275
+ for orig, expect in parse_qsl_semicolon_cases:
276
+ result = urlparse.parse_qsl(orig, separator=';')
277
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
278
+
279
+
280
+
281
def test_main():
282
test_support.run_unittest(UrlParseTestCase)
283
284
--- /dev/null
285
+++ b/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst
286
287
+Fix web cache poisoning vulnerability by defaulting the query args separator to ``&``, and allowing the user to choose a custom separator.
288
--- a/Lib/test/test_urllib2.py
289
+++ b/Lib/test/test_urllib2.py
290
291
# level 'def urlopen()' function defined in this... (quite ugly)
292
# test suite. They use different url opening codepaths. Plain
293
# urlopen uses FancyURLOpener which goes via a codepath that
294
- # calls urllib.parse.quote() on the URL which makes all of the
295
+ # calls urlparse.quote() on the URL which makes all of the
296
# above attempts at injection within the url _path_ safe.
297
escaped_char_repr = repr(char).replace('\\', r'\\')
298
InvalidURL = httplib.InvalidURL
299
300
# level 'def urlopen()' function defined in this... (quite ugly)
301
# test suite. They use different url opening codepaths. Plain
302
# urlopen uses FancyURLOpener which goes via a codepath that
303
- # calls urllib.parse.quote() on the URL which makes all of the
304
+ # calls urlparse.quote() on the URL which makes all of the
305
# above attempts at injection within the url _path_ safe.
306
InvalidURL = httplib.InvalidURL
307
with self.assertRaisesRegexp(InvalidURL,
308
--- a/Misc/NEWS
309
+++ b/Misc/NEWS
310
311
- bpo-18167: cgi.FieldStorage no longer fails to handle multipart/form-data
312
when \r\n appears at end of 65535 bytes without other newlines.
313
314
-- bpo-17403: urllib.parse.robotparser normalizes the urls before adding to
315
+- bpo-17403: urlparse.robotparser normalizes the urls before adding to
316
ruleline. This helps in handling certain types invalid urls in a
317
conservative manner. Patch contributed by Mher Movsisyan.
318
319
320
Library
321
-------
322
323
-- bpo-7904: Changes to urllib.parse.urlsplit to handle schemes as defined by
324
+- bpo-7904: Changes to urlparse.urlsplit to handle schemes as defined by
325
RFC3986. Anything before :// is considered a scheme and is followed by an
326
authority (or netloc) and by '/' led path, which is optional.
327
328
--- a/Lib/urlparse.py
329
+++ b/Lib/urlparse.py
330
331
append(item)
332
return ''.join(res)
333
334
-def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
335
+def parse_qs(qs, keep_blank_values=0, strict_parsing=0,
336
+ max_num_fields=None, separator='&'):
337
"""Parse a query given as a string argument.
338
339
Arguments:
340
341
342
max_num_fields: int. If set, then throws a ValueError if there
343
are more than n fields read by parse_qsl().
344
+
345
+ separator: str. The symbol to use for separating the query arguments.
346
+ Defaults to &.
347
"""
348
dict = {}
349
for name, value in parse_qsl(qs, keep_blank_values, strict_parsing,
350
- max_num_fields):
351
+ max_num_fields, separator=separator):
352
if name in dict:
353
dict[name].append(value)
354
else:
355
dict[name] = [value]
356
return dict
357
358
-def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
359
+def parse_qsl(qs, keep_blank_values=0, strict_parsing=0,
360
+ max_num_fields=None, separator='&'):
361
"""Parse a query given as a string argument.
362
363
Arguments:
364
365
max_num_fields: int. If set, then throws a ValueError if there
366
are more than n fields read by parse_qsl().
367
368
+ separator: str. The symbol to use for separating the query arguments.
369
+ Defaults to &.
370
+
371
Returns a list, as G-d intended.
372
"""
373
# If max_num_fields is defined then check that the number of fields
374
# is less than max_num_fields. This prevents a memory exhaustion DOS
375
# attack via post bodies with many fields.
376
+ if not separator or (not isinstance(separator, (str, bytes))):
377
+ raise ValueError("Separator must be of type string or bytes.")
378
+
379
if max_num_fields is not None:
380
- num_fields = 1 + qs.count('&') + qs.count(';')
381
+ num_fields = 1 + qs.count(separator)
382
if max_num_fields < num_fields:
383
raise ValueError('Max number of fields exceeded')
384
385
- pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
386
+ pairs = [s1 for s1 in qs.split(separator)]
387
r = []
388
for name_value in pairs:
389
if not name_value and not strict_parsing:
390