1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 | Lib/test/test_unicode_file.py
# Test some Unicode file name semantics # We dont test many operations on files other than # that their names can be used with Unicode characters. import os, glob, time, shutil import unicodedata import unittest from test.test_support import run_unittest, change_cwd, TESTFN_UNICODE from test.test_support import TESTFN_ENCODING, TESTFN_UNENCODABLE try: TESTFN_ENCODED = TESTFN_UNICODE.encode(TESTFN_ENCODING) except (UnicodeError, TypeError): # Either the file system encoding is None, or the file name # cannot be encoded in the file system encoding. raise unittest.SkipTest("No Unicode filesystem semantics on this platform.") if TESTFN_ENCODED.decode(TESTFN_ENCODING) != TESTFN_UNICODE: # The file system encoding does not support Latin-1 # (which test_support assumes), so try the file system # encoding instead. import sys try: TESTFN_UNICODE = unicode("@test-\xe0\xf2", sys.getfilesystemencoding()) TESTFN_ENCODED = TESTFN_UNICODE.encode(TESTFN_ENCODING) if '?' in TESTFN_ENCODED: # MBCS will not report the error properly raise UnicodeError, "mbcs encoding problem" except (UnicodeError, TypeError): raise unittest.SkipTest("Cannot find a suiteable filename.") if TESTFN_ENCODED.decode(TESTFN_ENCODING) != TESTFN_UNICODE: raise unittest.SkipTest("Cannot find a suitable filename.") def remove_if_exists(filename): if os.path.exists(filename): os.unlink(filename) class TestUnicodeFiles(unittest.TestCase): # The 'do_' functions are the actual tests. They generally assume the # file already exists etc. # Do all the tests we can given only a single filename. The file should # exist. def _do_single(self, filename): self.assertTrue(os.path.exists(filename)) self.assertTrue(os.path.isfile(filename)) self.assertTrue(os.access(filename, os.R_OK)) self.assertTrue(os.path.exists(os.path.abspath(filename))) self.assertTrue(os.path.isfile(os.path.abspath(filename))) self.assertTrue(os.access(os.path.abspath(filename), os.R_OK)) os.chmod(filename, 0777) os.utime(filename, None) os.utime(filename, (time.time(), time.time())) # Copy/rename etc tests using the same filename self._do_copyish(filename, filename) # Filename should appear in glob output self.assertTrue( os.path.abspath(filename)==os.path.abspath(glob.glob(filename)[0])) # basename should appear in listdir. path, base = os.path.split(os.path.abspath(filename)) if isinstance(base, str): base = base.decode(TESTFN_ENCODING) file_list = os.listdir(path) # listdir() with a unicode arg may or may not return Unicode # objects, depending on the platform. if file_list and isinstance(file_list[0], str): file_list = [f.decode(TESTFN_ENCODING) for f in file_list] # Normalize the unicode strings, as round-tripping the name via the OS # may return a different (but equivalent) value. base = unicodedata.normalize("NFD", base) file_list = [unicodedata.normalize("NFD", f) for f in file_list] self.assertIn(base, file_list) # Do as many "equivalancy' tests as we can - ie, check that although we # have different types for the filename, they refer to the same file. def _do_equivalent(self, filename1, filename2): # Note we only check "filename1 against filename2" - we don't bother # checking "filename2 against 1", as we assume we are called again with # the args reversed. self.assertTrue(type(filename1)!=type(filename2), "No point checking equivalent filenames of the same type") # stat and lstat should return the same results. self.assertEqual(os.stat(filename1), os.stat(filename2)) self.assertEqual(os.lstat(filename1), os.lstat(filename2)) # Copy/rename etc tests using equivalent filename self._do_copyish(filename1, filename2) # Tests that copy, move, etc one file to another. def _do_copyish(self, filename1, filename2): # Should be able to rename the file using either name. self.assertTrue(os.path.isfile(filename1)) # must exist. os.rename(filename1, filename2 + ".new") self.assertTrue(os.path.isfile(filename1+".new")) os.rename(filename1 + ".new", filename2) self.assertTrue(os.path.isfile(filename2)) shutil.copy(filename1, filename2 + ".new") os.unlink(filename1 + ".new") # remove using equiv name. # And a couple of moves, one using each name. shutil.move(filename1, filename2 + ".new") self.assertTrue(not os.path.exists(filename2)) shutil.move(filename1 + ".new", filename2) self.assertTrue(os.path.exists(filename1)) # Note - due to the implementation of shutil.move, # it tries a rename first. This only fails on Windows when on # different file systems - and this test can't ensure that. # So we test the shutil.copy2 function, which is the thing most # likely to fail. shutil.copy2(filename1, filename2 + ".new") os.unlink(filename1 + ".new") def _do_directory(self, make_name, chdir_name, encoded): if os.path.isdir(make_name): os.rmdir(make_name) os.mkdir(make_name) try: with change_cwd(chdir_name): if not encoded: cwd_result = os.getcwdu() name_result = make_name else: cwd_result = os.getcwd().decode(TESTFN_ENCODING) name_result = make_name.decode(TESTFN_ENCODING) cwd_result = unicodedata.normalize("NFD", cwd_result) name_result = unicodedata.normalize("NFD", name_result) self.assertEqual(os.path.basename(cwd_result),name_result) finally: os.rmdir(make_name) # The '_test' functions 'entry points with params' - ie, what the # top-level 'test' functions would be if they could take params def _test_single(self, filename): remove_if_exists(filename) f = file(filename, "w") f.close() try: self._do_single(filename) finally: os.unlink(filename) self.assertTrue(not os.path.exists(filename)) # and again with os.open. f = os.open(filename, os.O_CREAT) os.close(f) try: self._do_single(filename) finally: os.unlink(filename) def _test_equivalent(self, filename1, filename2): remove_if_exists(filename1) self.assertTrue(not os.path.exists(filename2)) f = file(filename1, "w") f.close() try: self._do_equivalent(filename1, filename2) finally: os.unlink(filename1) # The 'test' functions are unittest entry points, and simply call our # _test functions with each of the filename combinations we wish to test def test_single_files(self): self._test_single(TESTFN_ENCODED) self._test_single(TESTFN_UNICODE) if TESTFN_UNENCODABLE is not None: self._test_single(TESTFN_UNENCODABLE) def test_equivalent_files(self): self._test_equivalent(TESTFN_ENCODED, TESTFN_UNICODE) self._test_equivalent(TESTFN_UNICODE, TESTFN_ENCODED) def test_directories(self): # For all 'equivalent' combinations: # Make dir with encoded, chdir with unicode, checkdir with encoded # (or unicode/encoded/unicode, etc ext = ".dir" self._do_directory(TESTFN_ENCODED+ext, TESTFN_ENCODED+ext, True) self._do_directory(TESTFN_ENCODED+ext, TESTFN_UNICODE+ext, True) self._do_directory(TESTFN_UNICODE+ext, TESTFN_ENCODED+ext, False) self._do_directory(TESTFN_UNICODE+ext, TESTFN_UNICODE+ext, False) # Our directory name that can't use a non-unicode name. if TESTFN_UNENCODABLE is not None: self._do_directory(TESTFN_UNENCODABLE+ext, TESTFN_UNENCODABLE+ext, False) def test_main(): run_unittest(__name__) if __name__ == "__main__": test_main() |