Discussion:
[Python-checkins] cpython (2.7): Issue #13664: GzipFile now supports non-ascii Unicode filenames.
serhiy.storchaka
2014-10-12 19:25:47 UTC
Permalink
https://hg.python.org/cpython/rev/272c78c9c47e
changeset: 93009:272c78c9c47e
branch: 2.7
parent: 93006:ff59b0f9e142
user: Serhiy Storchaka <storchaka at gmail.com>
date: Sun Oct 12 22:23:28 2014 +0300
summary:
Issue #13664: GzipFile now supports non-ascii Unicode filenames.

files:
Lib/gzip.py | 13 ++++++++++---
Lib/test/test_gzip.py | 11 +++++++++++
Misc/NEWS | 2 ++
3 files changed, 23 insertions(+), 3 deletions(-)


diff --git a/Lib/gzip.py b/Lib/gzip.py
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -164,9 +164,16 @@
def _write_gzip_header(self):
self.fileobj.write('\037\213') # magic header
self.fileobj.write('\010') # compression method
- fname = os.path.basename(self.name)
- if fname.endswith(".gz"):
- fname = fname[:-3]
+ try:
+ # RFC 1952 requires the FNAME field to be Latin-1. Do not
+ # include filenames that cannot be represented that way.
+ fname = os.path.basename(self.name)
+ if not isinstance(fname, str):
+ fname = fname.encode('latin-1')
+ if fname.endswith('.gz'):
+ fname = fname[:-3]
+ except UnicodeEncodeError:
+ fname = ''
flags = 0
if fname:
flags = FNAME
diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py
--- a/Lib/test/test_gzip.py
+++ b/Lib/test/test_gzip.py
@@ -30,6 +30,17 @@
def tearDown(self):
test_support.unlink(self.filename)

+ @test_support.requires_unicode
+ def test_unicode_filename(self):
+ unicode_filename = test_support.TESTFN_UNICODE
+ with gzip.GzipFile(unicode_filename, "wb") as f:
+ f.write(data1 * 50)
+ with gzip.GzipFile(unicode_filename, "rb") as f:
+ self.assertEqual(f.read(), data1 * 50)
+ # Sanity check that we are actually operating on the right file.
+ with open(unicode_filename, 'rb') as fobj, \
+ gzip.GzipFile(fileobj=fobj, mode="rb") as f:
+ self.assertEqual(f.read(), data1 * 50)

def test_write(self):
with gzip.GzipFile(self.filename, 'wb') as f:
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -37,6 +37,8 @@
Library
-------

+- Issue #13664: GzipFile now supports non-ascii Unicode filenames.
+
- Issue #13096: Fixed segfault in CTypes POINTER handling of large
values.
--
Repository URL: https://hg.python.org/cpython
Loading...