File CVE-2023-27043-email-parsing-errors.patch of Package python (Revision 7c7532457b948cc36e86ff51d95885bc)
Currently displaying revision 7c7532457b948cc36e86ff51d95885bc , Show latest
xxxxxxxxxx
1
---
2
Doc/library/email.utils.rst | 19
3
Lib/email/test/test_email.py | 192 +++++++++-
4
Lib/email/test/test_email_renamed.py | 50 ++
5
Lib/email/utils.py | 155 +++++++-
6
Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst | 8
7
5 files changed, 393 insertions(+), 31 deletions(-)
8
9
--- a/Doc/library/email.utils.rst
10
+++ b/Doc/library/email.utils.rst
11
12
begins with angle brackets, they are stripped off.
13
14
15
-.. function:: parseaddr(address)
16
+.. function:: parseaddr(address, strict=True)
17
18
Parse address -- which should be the value of some address-containing field such
19
as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and
20
*email address* parts. Returns a tuple of that information, unless the parse
21
fails, in which case a 2-tuple of ``('', '')`` is returned.
22
23
+ If *strict* is true, use a strict parser which rejects malformed inputs.
24
+
25
+ .. versionchanged:: 3.13
26
+ Add *strict* optional parameter and reject malformed inputs by default.
27
+
28
29
.. function:: formataddr(pair)
30
31
32
second element is returned unmodified.
33
34
35
-.. function:: getaddresses(fieldvalues)
36
+.. function:: getaddresses(fieldvalues, strict=True)
37
38
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
39
*fieldvalues* is a sequence of header field values as might be returned by
40
- :meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
41
- example that gets all the recipients of a message::
42
+ :meth:`Message.get_all <email.message.Message.get_all>`.
43
+
44
+ If *strict* is true, use a strict parser which rejects malformed inputs.
45
+
46
+ Here's a simple example that gets all the recipients of a message::
47
48
from email.utils import getaddresses
49
50
51
resent_ccs = msg.get_all('resent-cc', [])
52
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
53
54
+ .. versionchanged:: 3.13
55
+ Add *strict* optional parameter and reject malformed inputs by default.
56
+
57
58
.. function:: parsedate(date)
59
60
--- a/Lib/email/test/test_email.py
61
+++ b/Lib/email/test/test_email.py
62
63
+# -*- coding: utf-8 -*-
64
# Copyright (C) 2001-2010 Python Software Foundation
65
# Contact: email-sig@python.org
66
# email package unit tests
67
68
[('Al Person', 'aperson@dom.ain'),
69
('Bud Person', 'bperson@dom.ain')])
70
71
+ def test_parsing_errors(self):
72
+ """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056"""
73
+ alice = 'alice@example.org'
74
+ bob = 'bob@example.com'
75
+ empty = ('', '')
76
+
77
+ # Test Utils.getaddresses() and Utils.parseaddr() on malformed email
78
+ # addresses: default behavior (strict=True) rejects malformed address,
79
+ # and strict=False which tolerates malformed address.
80
+ for invalid_separator, expected_non_strict in (
81
+ ('(', [('<%s>' % bob, alice)]),
82
+ (')', [('', alice), empty, ('', bob)]),
83
+ ('<', [('', alice), empty, ('', bob), empty]),
84
+ ('>', [('', alice), empty, ('', bob)]),
85
+ ('[', [('', '%s[<%s>]' % (alice, bob))]),
86
+ (']', [('', alice), empty, ('', bob)]),
87
+ ('@', [empty, empty, ('', bob)]),
88
+ (';', [('', alice), empty, ('', bob)]),
89
+ (':', [('', alice), ('', bob)]),
90
+ ('.', [('', alice + '.'), ('', bob)]),
91
+ ('"', [('', alice), ('', '<%s>' % bob)]),
92
+ ):
93
+ address = '%s%s<%s>' % (alice, invalid_separator, bob)
94
+ self.assertEqual(Utils.getaddresses([address]),
95
+ [empty])
96
+ self.assertEqual(Utils.getaddresses([address], strict=False),
97
+ expected_non_strict)
98
+
99
+ self.assertEqual(Utils.parseaddr([address]),
100
+ empty)
101
+ self.assertEqual(Utils.parseaddr([address], strict=False),
102
+ ('', address))
103
+
104
+ # Comma (',') is treated differently depending on strict parameter.
105
+ # Comma without quotes.
106
+ address = '%s,<%s>' % (alice, bob)
107
+ self.assertEqual(Utils.getaddresses([address]),
108
+ [('', alice), ('', bob)])
109
+ self.assertEqual(Utils.getaddresses([address], strict=False),
110
+ [('', alice), ('', bob)])
111
+ self.assertEqual(Utils.parseaddr([address]),
112
+ empty)
113
+ self.assertEqual(Utils.parseaddr([address], strict=False),
114
+ ('', address))
115
+
116
+ # Real name between quotes containing comma.
117
+ address = '"Alice, alice@example.org" <bob@example.com>'
118
+ expected_strict = ('Alice, alice@example.org', 'bob@example.com')
119
+ self.assertEqual(Utils.getaddresses([address]), [expected_strict])
120
+ self.assertEqual(Utils.getaddresses([address], strict=False), [expected_strict])
121
+ self.assertEqual(Utils.parseaddr([address]), expected_strict)
122
+ self.assertEqual(Utils.parseaddr([address], strict=False),
123
+ ('', address))
124
+
125
+ # Valid parenthesis in comments.
126
+ address = 'alice@example.org (Alice)'
127
+ expected_strict = ('Alice', 'alice@example.org')
128
+ self.assertEqual(Utils.getaddresses([address]), [expected_strict])
129
+ self.assertEqual(Utils.getaddresses([address], strict=False), [expected_strict])
130
+ self.assertEqual(Utils.parseaddr([address]), expected_strict)
131
+ self.assertEqual(Utils.parseaddr([address], strict=False),
132
+ ('', address))
133
+
134
+ # Invalid parenthesis in comments.
135
+ address = 'alice@example.org )Alice('
136
+ self.assertEqual(Utils.getaddresses([address]), [empty])
137
+ self.assertEqual(Utils.getaddresses([address], strict=False),
138
+ [('', 'alice@example.org'), ('', ''), ('', 'Alice')])
139
+ self.assertEqual(Utils.parseaddr([address]), empty)
140
+ self.assertEqual(Utils.parseaddr([address], strict=False),
141
+ ('', address))
142
+
143
+ # Two addresses with quotes separated by comma.
144
+ address = '"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>'
145
+ self.assertEqual(Utils.getaddresses([address]),
146
+ [('Jane Doe', 'jane@example.net'),
147
+ ('John Doe', 'john@example.net')])
148
+ self.assertEqual(Utils.getaddresses([address], strict=False),
149
+ [('Jane Doe', 'jane@example.net'),
150
+ ('John Doe', 'john@example.net')])
151
+ self.assertEqual(Utils.parseaddr([address]), empty)
152
+ self.assertEqual(Utils.parseaddr([address], strict=False),
153
+ ('', address))
154
+
155
+ # Test Utils.supports_strict_parsing attribute
156
+ self.assertEqual(Utils.supports_strict_parsing, True)
157
+
158
+ def test_parsing_unicode_str(self):
159
+ email_in = "Honza Novák <honza@example.com>"
160
+ self.assertEqual(Utils.parseaddr("Honza str Novák <honza@example.com>"),
161
+ ('Honza str Nov\xc3\xa1k', 'honza@example.com'))
162
+ self.assertEqual(Utils.parseaddr(u"Honza unicode Novák <honza@example.com>"),
163
+ (u'Honza unicode Nov\xe1k', u'honza@example.com'))
164
+
165
def test_getaddresses_nasty(self):
166
- eq = self.assertEqual
167
- eq(Utils.getaddresses(['foo: ;']), [('', '')])
168
- eq(Utils.getaddresses(
169
- ['[]*-- =~$']),
170
- [('', ''), ('', ''), ('', '*--')])
171
- eq(Utils.getaddresses(
172
- ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
173
- [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
174
+ for addresses, expected in (
175
+ ([u'"Sürname, Firstname" <to@example.com>'],
176
+ [(u'Sürname, Firstname', 'to@example.com')]),
177
+
178
+ (['foo: ;'],
179
+ [('', '')]),
180
+
181
+ (['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>'],
182
+ [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]),
183
+
184
+ ([r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>'],
185
+ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]),
186
+
187
+ (['(Empty list)(start)Undisclosed recipients :(nobody(I know))'],
188
+ [('', '')]),
189
+
190
+ (['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'],
191
+ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]),
192
+
193
+ (['John Doe <jdoe@machine(comment). example>'],
194
+ [('John Doe (comment)', 'jdoe@machine.example')]),
195
+
196
+ (['"Mary Smith: Personal Account" <smith@home.example>'],
197
+ [('Mary Smith: Personal Account', 'smith@home.example')]),
198
+
199
+ (['Undisclosed recipients:;'],
200
+ [('', '')]),
201
+
202
+ ([r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>'],
203
+ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]),
204
+ ):
205
+ self.assertEqual(Utils.getaddresses(addresses),
206
+ expected)
207
+ self.assertEqual(Utils.getaddresses(addresses, strict=False),
208
+ expected)
209
+
210
+ addresses = ['[]*-- =~$']
211
+ self.assertEqual(Utils.getaddresses(addresses),
212
+ [('', '')])
213
+ self.assertEqual(Utils.getaddresses(addresses, strict=False),
214
+ [('', ''), ('', ''), ('', '*--')])
215
216
def test_getaddresses_embedded_comment(self):
217
"""Test proper handling of a nested comment"""
218
219
addrs = Utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
220
eq(addrs[0][1], 'foo@bar.com')
221
222
+ def test_iter_escaped_chars(self):
223
+ self.assertEqual(list(Utils._iter_escaped_chars(r'a\\b\"c\\"d')),
224
+ [(0, 'a'),
225
+ (2, '\\\\'),
226
+ (3, 'b'),
227
+ (5, '\\"'),
228
+ (6, 'c'),
229
+ (8, '\\\\'),
230
+ (9, '"'),
231
+ (10, 'd')])
232
+ self.assertEqual(list(Utils._iter_escaped_chars('a\\')),
233
+ [(0, 'a'), (1, '\\')])
234
+
235
+ def test_strip_quoted_realnames(self):
236
+ def check(addr, expected):
237
+ self.assertEqual(Utils._strip_quoted_realnames(addr), expected)
238
+
239
+ check('"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>',
240
+ ' <jane@example.net>, <john@example.net>')
241
+ check(r'"Jane \"Doe\"." <jane@example.net>',
242
+ ' <jane@example.net>')
243
+
244
+ # special cases
245
+ check(r'before"name"after', 'beforeafter')
246
+ check(r'before"name"', 'before')
247
+ check(r'b"name"', 'b') # single char
248
+ check(r'"name"after', 'after')
249
+ check(r'"name"a', 'a') # single char
250
+ check(r'"name"', '')
251
+
252
+ # no change
253
+ for addr in (
254
+ 'Jane Doe <jane@example.net>, John Doe <john@example.net>',
255
+ 'lone " quote',
256
+ ):
257
+ self.assertEqual(Utils._strip_quoted_realnames(addr), addr)
258
+
259
+ def test_check_parenthesis(self):
260
+ addr = 'alice@example.net'
261
+ self.assertTrue(Utils._check_parenthesis('%s (Alice)' % addr))
262
+ self.assertFalse(Utils._check_parenthesis('%s )Alice(' % addr))
263
+ self.assertFalse(Utils._check_parenthesis('%s (Alice))' % addr))
264
+ self.assertFalse(Utils._check_parenthesis('%s ((Alice)' % addr))
265
+
266
+ # Ignore real name between quotes
267
+ self.assertTrue(Utils._check_parenthesis('")Alice((" %s' % addr))
268
+
269
+
270
def test_make_msgid_collisions(self):
271
# Test make_msgid uniqueness, even with multiple threads
272
class MsgidsThread(Thread):
273
--- a/Lib/email/test/test_email_renamed.py
274
+++ b/Lib/email/test/test_email_renamed.py
275
276
+# -*- coding: utf-8 -*-
277
# Copyright (C) 2001-2007 Python Software Foundation
278
# Contact: email-sig@python.org
279
# email package unit tests
280
281
('Bud Person', 'bperson@dom.ain')])
282
283
def test_getaddresses_nasty(self):
284
- eq = self.assertEqual
285
- eq(utils.getaddresses(['foo: ;']), [('', '')])
286
- eq(utils.getaddresses(
287
- ['[]*-- =~$']),
288
- [('', ''), ('', ''), ('', '*--')])
289
- eq(utils.getaddresses(
290
- ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
291
- [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
292
+ for addresses, expected in (
293
+ ([u'"Sürname, Firstname" <to@example.com>'],
294
+ [(u'Sürname, Firstname', 'to@example.com')]),
295
+
296
+ (['foo: ;'],
297
+ [('', '')]),
298
+
299
+ (['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>'],
300
+ [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]),
301
+
302
+ ([r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>'],
303
+ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]),
304
+
305
+ (['(Empty list)(start)Undisclosed recipients :(nobody(I know))'],
306
+ [('', '')]),
307
+
308
+ (['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'],
309
+ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]),
310
+
311
+ (['John Doe <jdoe@machine(comment). example>'],
312
+ [('John Doe (comment)', 'jdoe@machine.example')]),
313
+
314
+ (['"Mary Smith: Personal Account" <smith@home.example>'],
315
+ [('Mary Smith: Personal Account', 'smith@home.example')]),
316
+
317
+ (['Undisclosed recipients:;'],
318
+ [('', '')]),
319
+
320
+ ([r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>'],
321
+ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]),
322
+ ):
323
+ self.assertEqual(utils.getaddresses(addresses),
324
+ expected)
325
+ self.assertEqual(utils.getaddresses(addresses, strict=False),
326
+ expected)
327
+
328
+ addresses = ['[]*-- =~$']
329
+ self.assertEqual(utils.getaddresses(addresses),
330
+ [('', '')])
331
+ self.assertEqual(utils.getaddresses(addresses, strict=False),
332
+ [('', ''), ('', ''), ('', '*--')])
333
334
def test_getaddresses_embedded_comment(self):
335
"""Test proper handling of a nested comment"""
336
--- a/Lib/email/utils.py
337
+++ b/Lib/email/utils.py
338
339
return address
340
341
342
-
343
-def getaddresses(fieldvalues):
344
- """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
345
- all = COMMASPACE.join(fieldvalues)
346
- a = _AddressList(all)
347
- return a.addresslist
348
+def _iter_escaped_chars(addr):
349
+ pos = 0
350
+ escape = False
351
+ for pos, ch in enumerate(addr):
352
+ if escape:
353
+ yield (pos, '\\' + ch)
354
+ escape = False
355
+ elif ch == '\\':
356
+ escape = True
357
+ else:
358
+ yield (pos, ch)
359
+ if escape:
360
+ yield (pos, '\\')
361
+
362
+
363
+def _strip_quoted_realnames(addr):
364
+ """Strip real names between quotes."""
365
+ if '"' not in addr:
366
+ # Fast path
367
+ return addr
368
+
369
+ start = 0
370
+ open_pos = None
371
+ result = []
372
+ for pos, ch in _iter_escaped_chars(addr):
373
+ if ch == '"':
374
+ if open_pos is None:
375
+ open_pos = pos
376
+ else:
377
+ if start != open_pos:
378
+ result.append(addr[start:open_pos])
379
+ start = pos + 1
380
+ open_pos = None
381
+
382
+ if start < len(addr):
383
+ result.append(addr[start:])
384
+
385
+ return ''.join(result)
386
+
387
+
388
+supports_strict_parsing = True
389
+
390
+def getaddresses(fieldvalues, strict=True):
391
+ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
392
+
393
+ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
394
+ its place.
395
+
396
+ If strict is true, use a strict parser which rejects malformed inputs.
397
+ """
398
+
399
+ # If strict is true, if the resulting list of parsed addresses is greater
400
+ # than the number of fieldvalues in the input list, a parsing error has
401
+ # occurred and consequently a list containing a single empty 2-tuple [('',
402
+ # '')] is returned in its place. This is done to avoid invalid output.
403
+ #
404
+ # Malformed input: getaddresses(['alice@example.com <bob@example.com>'])
405
+ # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')]
406
+ # Safe output: [('', '')]
407
+
408
+ if not strict:
409
+ all = COMMASPACE.join(unicode(v) for v in fieldvalues)
410
+ a = _AddressList(all)
411
+ return a.addresslist
412
+
413
+ fieldvalues = [unicode(v) for v in fieldvalues]
414
+ fieldvalues = _pre_parse_validation(fieldvalues)
415
+ addr = COMMASPACE.join(fieldvalues)
416
+ a = _AddressList(addr)
417
+ result = _post_parse_validation(a.addresslist)
418
+
419
+ # Treat output as invalid if the number of addresses is not equal to the
420
+ # expected number of addresses.
421
+ n = 0
422
+ for v in fieldvalues:
423
+ # When a comma is used in the Real Name part it is not a deliminator.
424
+ # So strip those out before counting the commas.
425
+ v = _strip_quoted_realnames(v)
426
+ # Expected number of addresses: 1 + number of commas
427
+ n += 1 + v.count(',')
428
+ if len(result) != n:
429
+ return [('', '')]
430
+
431
+ return result
432
+
433
434
435
-
436
ecre = re.compile(r'''
437
=\? # literal =?
438
(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
439
440
return _parsedate_tz(data)
441
442
443
-def parseaddr(addr):
444
+def parseaddr(addr, strict=True):
445
"""
446
Parse addr into its constituent realname and email address parts.
447
448
Return a tuple of realname and email address, unless the parse fails, in
449
which case return a 2-tuple of ('', '').
450
+
451
+ If strict is True, use a strict parser which rejects malformed inputs.
452
"""
453
- addrs = _AddressList(addr).addresslist
454
- if not addrs:
455
- return '', ''
456
+
457
+ if not strict:
458
+ addrs = _AddressList(addr).addresslist
459
+ if not addrs:
460
+ return ('', '')
461
+ return addrs[0]
462
+
463
+ if isinstance(addr, list):
464
+ addr = addr[0]
465
+
466
+ if not isinstance(addr, basestring):
467
+ return ('', '')
468
+
469
+ addr = _pre_parse_validation([addr])[0]
470
+ addrs = _post_parse_validation(_AddressList(addr).addresslist)
471
+
472
+ if not addrs or len(addrs) > 1:
473
+ return ('', '')
474
+
475
return addrs[0]
476
477
478
+def _check_parenthesis(addr):
479
+ # Ignore parenthesis in quoted real names.
480
+ addr = _strip_quoted_realnames(addr)
481
+
482
+ opens = 0
483
+ for pos, ch in _iter_escaped_chars(addr):
484
+ if ch == '(':
485
+ opens += 1
486
+ elif ch == ')':
487
+ opens -= 1
488
+ if opens < 0:
489
+ return False
490
+ return (opens == 0)
491
+
492
+
493
+def _pre_parse_validation(email_header_fields):
494
+ accepted_values = []
495
+ for v in email_header_fields:
496
+ if not _check_parenthesis(v):
497
+ v = "('', '')"
498
+ accepted_values.append(v)
499
+
500
+ return accepted_values
501
+
502
+
503
+def _post_parse_validation(parsed_email_header_tuples):
504
+ accepted_values = []
505
+ # The parser would have parsed a correctly formatted domain-literal
506
+ # The existence of an [ after parsing indicates a parsing failure
507
+ for v in parsed_email_header_tuples:
508
+ if '[' in v[1]:
509
+ v = ('', '')
510
+ accepted_values.append(v)
511
+
512
+ return accepted_values
513
+
514
+
515
# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
516
def unquote(str):
517
"""Remove quotes from a string."""
518
--- /dev/null
519
+++ b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
520
521
+:func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now
522
+return ``('', '')`` 2-tuples in more situations where invalid email
523
+addresses are encountered instead of potentially inaccurate values. Add
524
+optional *strict* parameter to these two functions: use ``strict=False`` to
525
+get the old behavior, accept malformed inputs.
526
+``getattr(email.utils, 'supports_strict_parsing', False)`` can be use to check
527
+if the *strict* paramater is available. Patch by Thomas Dwyer and Victor
528
+Stinner to improve the CVE-2023-27043 fix.
529