You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1407 lines
37 KiB

13 years ago
  1. #include "Python.h"
  2. #include "osdefs.h"
  3. #include <locale.h>
  4. #ifdef MS_WINDOWS
  5. # include <malloc.h>
  6. # include <windows.h>
  7. #endif
  8. #ifdef HAVE_LANGINFO_H
  9. #include <langinfo.h>
  10. #endif
  11. #ifdef HAVE_SYS_IOCTL_H
  12. #include <sys/ioctl.h>
  13. #endif
  14. #ifdef HAVE_FCNTL_H
  15. #include <fcntl.h>
  16. #endif /* HAVE_FCNTL_H */
  17. #ifdef __APPLE__
  18. extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
  19. #endif
  20. #ifdef O_CLOEXEC
  21. /* Does open() support the O_CLOEXEC flag? Possible values:
  22. -1: unknown
  23. 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
  24. 1: open() supports O_CLOEXEC flag, close-on-exec is set
  25. The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
  26. and os.open(). */
  27. int _Py_open_cloexec_works = -1;
  28. #endif
  29. PyObject *
  30. _Py_device_encoding(int fd)
  31. {
  32. #if defined(MS_WINDOWS)
  33. UINT cp;
  34. #endif
  35. if (!_PyVerify_fd(fd) || !isatty(fd)) {
  36. Py_RETURN_NONE;
  37. }
  38. #if defined(MS_WINDOWS)
  39. if (fd == 0)
  40. cp = GetConsoleCP();
  41. else if (fd == 1 || fd == 2)
  42. cp = GetConsoleOutputCP();
  43. else
  44. cp = 0;
  45. /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
  46. has no console */
  47. if (cp != 0)
  48. return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
  49. #elif defined(CODESET)
  50. {
  51. char *codeset = nl_langinfo(CODESET);
  52. if (codeset != NULL && codeset[0] != 0)
  53. return PyUnicode_FromString(codeset);
  54. }
  55. #endif
  56. Py_RETURN_NONE;
  57. }
  58. #if !defined(__APPLE__) && !defined(MS_WINDOWS)
  59. extern int _Py_normalize_encoding(const char *, char *, size_t);
  60. /* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
  61. On these operating systems, nl_langinfo(CODESET) announces an alias of the
  62. ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
  63. ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
  64. locale.getpreferredencoding() codec. For example, if command line arguments
  65. are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
  66. UnicodeEncodeError instead of retrieving the original byte string.
  67. The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
  68. nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
  69. one byte in range 0x80-0xff can be decoded from the locale encoding. The
  70. workaround is also enabled on error, for example if getting the locale
  71. failed.
  72. Values of force_ascii:
  73. 1: the workaround is used: Py_EncodeLocale() uses
  74. encode_ascii_surrogateescape() and Py_DecodeLocale() uses
  75. decode_ascii_surrogateescape()
  76. 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
  77. Py_DecodeLocale() uses mbstowcs()
  78. -1: unknown, need to call check_force_ascii() to get the value
  79. */
  80. static int force_ascii = -1;
  81. static int
  82. check_force_ascii(void)
  83. {
  84. char *loc;
  85. #if defined(HAVE_LANGINFO_H) && defined(CODESET)
  86. char *codeset, **alias;
  87. char encoding[100];
  88. int is_ascii;
  89. unsigned int i;
  90. char* ascii_aliases[] = {
  91. "ascii",
  92. "646",
  93. "ansi-x3.4-1968",
  94. "ansi-x3-4-1968",
  95. "ansi-x3.4-1986",
  96. "cp367",
  97. "csascii",
  98. "ibm367",
  99. "iso646-us",
  100. "iso-646.irv-1991",
  101. "iso-ir-6",
  102. "us",
  103. "us-ascii",
  104. NULL
  105. };
  106. #endif
  107. loc = setlocale(LC_CTYPE, NULL);
  108. if (loc == NULL)
  109. goto error;
  110. if (strcmp(loc, "C") != 0) {
  111. /* the LC_CTYPE locale is different than C */
  112. return 0;
  113. }
  114. #if defined(HAVE_LANGINFO_H) && defined(CODESET)
  115. codeset = nl_langinfo(CODESET);
  116. if (!codeset || codeset[0] == '\0') {
  117. /* CODESET is not set or empty */
  118. goto error;
  119. }
  120. if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding)))
  121. goto error;
  122. is_ascii = 0;
  123. for (alias=ascii_aliases; *alias != NULL; alias++) {
  124. if (strcmp(encoding, *alias) == 0) {
  125. is_ascii = 1;
  126. break;
  127. }
  128. }
  129. if (!is_ascii) {
  130. /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
  131. return 0;
  132. }
  133. for (i=0x80; i<0xff; i++) {
  134. unsigned char ch;
  135. wchar_t wch;
  136. size_t res;
  137. ch = (unsigned char)i;
  138. res = mbstowcs(&wch, (char*)&ch, 1);
  139. if (res != (size_t)-1) {
  140. /* decoding a non-ASCII character from the locale encoding succeed:
  141. the locale encoding is not ASCII, force ASCII */
  142. return 1;
  143. }
  144. }
  145. /* None of the bytes in the range 0x80-0xff can be decoded from the locale
  146. encoding: the locale encoding is really ASCII */
  147. return 0;
  148. #else
  149. /* nl_langinfo(CODESET) is not available: always force ASCII */
  150. return 1;
  151. #endif
  152. error:
  153. /* if an error occured, force the ASCII encoding */
  154. return 1;
  155. }
  156. static char*
  157. encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)
  158. {
  159. char *result = NULL, *out;
  160. size_t len, i;
  161. wchar_t ch;
  162. if (error_pos != NULL)
  163. *error_pos = (size_t)-1;
  164. len = wcslen(text);
  165. result = PyMem_Malloc(len + 1); /* +1 for NUL byte */
  166. if (result == NULL)
  167. return NULL;
  168. out = result;
  169. for (i=0; i<len; i++) {
  170. ch = text[i];
  171. if (ch <= 0x7f) {
  172. /* ASCII character */
  173. *out++ = (char)ch;
  174. }
  175. else if (0xdc80 <= ch && ch <= 0xdcff) {
  176. /* UTF-8b surrogate */
  177. *out++ = (char)(ch - 0xdc00);
  178. }
  179. else {
  180. if (error_pos != NULL)
  181. *error_pos = i;
  182. PyMem_Free(result);
  183. return NULL;
  184. }
  185. }
  186. *out = '\0';
  187. return result;
  188. }
  189. #endif /* !defined(__APPLE__) && !defined(MS_WINDOWS) */
  190. #if !defined(__APPLE__) && (!defined(MS_WINDOWS) || !defined(HAVE_MBRTOWC))
  191. static wchar_t*
  192. decode_ascii_surrogateescape(const char *arg, size_t *size)
  193. {
  194. wchar_t *res;
  195. unsigned char *in;
  196. wchar_t *out;
  197. size_t argsize = strlen(arg) + 1;
  198. if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t))
  199. return NULL;
  200. res = PyMem_RawMalloc(argsize*sizeof(wchar_t));
  201. if (!res)
  202. return NULL;
  203. in = (unsigned char*)arg;
  204. out = res;
  205. while(*in)
  206. if(*in < 128)
  207. *out++ = *in++;
  208. else
  209. *out++ = 0xdc00 + *in++;
  210. *out = 0;
  211. if (size != NULL)
  212. *size = out - res;
  213. return res;
  214. }
  215. #endif
  216. /* Decode a byte string from the locale encoding with the
  217. surrogateescape error handler: undecodable bytes are decoded as characters
  218. in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
  219. character, escape the bytes using the surrogateescape error handler instead
  220. of decoding them.
  221. Return a pointer to a newly allocated wide character string, use
  222. PyMem_RawFree() to free the memory. If size is not NULL, write the number of
  223. wide characters excluding the null character into *size
  224. Return NULL on decoding error or memory allocation error. If *size* is not
  225. NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
  226. decoding error.
  227. Decoding errors should never happen, unless there is a bug in the C
  228. library.
  229. Use the Py_EncodeLocale() function to encode the character string back to a
  230. byte string. */
  231. wchar_t*
  232. Py_DecodeLocale(const char* arg, size_t *size)
  233. {
  234. #ifdef __APPLE__
  235. wchar_t *wstr;
  236. wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
  237. if (size != NULL) {
  238. if (wstr != NULL)
  239. *size = wcslen(wstr);
  240. else
  241. *size = (size_t)-1;
  242. }
  243. return wstr;
  244. #else
  245. wchar_t *res;
  246. size_t argsize;
  247. size_t count;
  248. #ifdef HAVE_MBRTOWC
  249. unsigned char *in;
  250. wchar_t *out;
  251. mbstate_t mbs;
  252. #endif
  253. #ifndef MS_WINDOWS
  254. if (force_ascii == -1)
  255. force_ascii = check_force_ascii();
  256. if (force_ascii) {
  257. /* force ASCII encoding to workaround mbstowcs() issue */
  258. res = decode_ascii_surrogateescape(arg, size);
  259. if (res == NULL)
  260. goto oom;
  261. return res;
  262. }
  263. #endif
  264. #ifdef HAVE_BROKEN_MBSTOWCS
  265. /* Some platforms have a broken implementation of
  266. * mbstowcs which does not count the characters that
  267. * would result from conversion. Use an upper bound.
  268. */
  269. argsize = strlen(arg);
  270. #else
  271. argsize = mbstowcs(NULL, arg, 0);
  272. #endif
  273. if (argsize != (size_t)-1) {
  274. if (argsize == PY_SSIZE_T_MAX)
  275. goto oom;
  276. argsize += 1;
  277. if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t))
  278. goto oom;
  279. res = (wchar_t *)PyMem_RawMalloc(argsize*sizeof(wchar_t));
  280. if (!res)
  281. goto oom;
  282. count = mbstowcs(res, arg, argsize);
  283. if (count != (size_t)-1) {
  284. wchar_t *tmp;
  285. /* Only use the result if it contains no
  286. surrogate characters. */
  287. for (tmp = res; *tmp != 0 &&
  288. !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
  289. ;
  290. if (*tmp == 0) {
  291. if (size != NULL)
  292. *size = count;
  293. return res;
  294. }
  295. }
  296. PyMem_RawFree(res);
  297. }
  298. /* Conversion failed. Fall back to escaping with surrogateescape. */
  299. #ifdef HAVE_MBRTOWC
  300. /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
  301. /* Overallocate; as multi-byte characters are in the argument, the
  302. actual output could use less memory. */
  303. argsize = strlen(arg) + 1;
  304. if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t))
  305. goto oom;
  306. res = (wchar_t*)PyMem_RawMalloc(argsize*sizeof(wchar_t));
  307. if (!res)
  308. goto oom;
  309. in = (unsigned char*)arg;
  310. out = res;
  311. memset(&mbs, 0, sizeof mbs);
  312. while (argsize) {
  313. size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
  314. if (converted == 0)
  315. /* Reached end of string; null char stored. */
  316. break;
  317. if (converted == (size_t)-2) {
  318. /* Incomplete character. This should never happen,
  319. since we provide everything that we have -
  320. unless there is a bug in the C library, or I
  321. misunderstood how mbrtowc works. */
  322. PyMem_RawFree(res);
  323. if (size != NULL)
  324. *size = (size_t)-2;
  325. return NULL;
  326. }
  327. if (converted == (size_t)-1) {
  328. /* Conversion error. Escape as UTF-8b, and start over
  329. in the initial shift state. */
  330. *out++ = 0xdc00 + *in++;
  331. argsize--;
  332. memset(&mbs, 0, sizeof mbs);
  333. continue;
  334. }
  335. if (Py_UNICODE_IS_SURROGATE(*out)) {
  336. /* Surrogate character. Escape the original
  337. byte sequence with surrogateescape. */
  338. argsize -= converted;
  339. while (converted--)
  340. *out++ = 0xdc00 + *in++;
  341. continue;
  342. }
  343. /* successfully converted some bytes */
  344. in += converted;
  345. argsize -= converted;
  346. out++;
  347. }
  348. if (size != NULL)
  349. *size = out - res;
  350. #else /* HAVE_MBRTOWC */
  351. /* Cannot use C locale for escaping; manually escape as if charset
  352. is ASCII (i.e. escape all bytes > 128. This will still roundtrip
  353. correctly in the locale's charset, which must be an ASCII superset. */
  354. res = decode_ascii_surrogateescape(arg, size);
  355. if (res == NULL)
  356. goto oom;
  357. #endif /* HAVE_MBRTOWC */
  358. return res;
  359. oom:
  360. if (size != NULL)
  361. *size = (size_t)-1;
  362. return NULL;
  363. #endif /* __APPLE__ */
  364. }
  365. /* Encode a wide character string to the locale encoding with the
  366. surrogateescape error handler: surrogate characters in the range
  367. U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
  368. Return a pointer to a newly allocated byte string, use PyMem_Free() to free
  369. the memory. Return NULL on encoding or memory allocation error.
  370. If error_pos is not NULL, *error_pos is set to the index of the invalid
  371. character on encoding error, or set to (size_t)-1 otherwise.
  372. Use the Py_DecodeLocale() function to decode the bytes string back to a wide
  373. character string. */
  374. char*
  375. Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
  376. {
  377. #ifdef __APPLE__
  378. Py_ssize_t len;
  379. PyObject *unicode, *bytes = NULL;
  380. char *cpath;
  381. unicode = PyUnicode_FromWideChar(text, wcslen(text));
  382. if (unicode == NULL)
  383. return NULL;
  384. bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
  385. Py_DECREF(unicode);
  386. if (bytes == NULL) {
  387. PyErr_Clear();
  388. if (error_pos != NULL)
  389. *error_pos = (size_t)-1;
  390. return NULL;
  391. }
  392. len = PyBytes_GET_SIZE(bytes);
  393. cpath = PyMem_Malloc(len+1);
  394. if (cpath == NULL) {
  395. PyErr_Clear();
  396. Py_DECREF(bytes);
  397. if (error_pos != NULL)
  398. *error_pos = (size_t)-1;
  399. return NULL;
  400. }
  401. memcpy(cpath, PyBytes_AsString(bytes), len + 1);
  402. Py_DECREF(bytes);
  403. return cpath;
  404. #else /* __APPLE__ */
  405. const size_t len = wcslen(text);
  406. char *result = NULL, *bytes = NULL;
  407. size_t i, size, converted;
  408. wchar_t c, buf[2];
  409. #ifndef MS_WINDOWS
  410. if (force_ascii == -1)
  411. force_ascii = check_force_ascii();
  412. if (force_ascii)
  413. return encode_ascii_surrogateescape(text, error_pos);
  414. #endif
  415. /* The function works in two steps:
  416. 1. compute the length of the output buffer in bytes (size)
  417. 2. outputs the bytes */
  418. size = 0;
  419. buf[1] = 0;
  420. while (1) {
  421. for (i=0; i < len; i++) {
  422. c = text[i];
  423. if (c >= 0xdc80 && c <= 0xdcff) {
  424. /* UTF-8b surrogate */
  425. if (bytes != NULL) {
  426. *bytes++ = c - 0xdc00;
  427. size--;
  428. }
  429. else
  430. size++;
  431. continue;
  432. }
  433. else {
  434. buf[0] = c;
  435. if (bytes != NULL)
  436. converted = wcstombs(bytes, buf, size);
  437. else
  438. converted = wcstombs(NULL, buf, 0);
  439. if (converted == (size_t)-1) {
  440. if (result != NULL)
  441. PyMem_Free(result);
  442. if (error_pos != NULL)
  443. *error_pos = i;
  444. return NULL;
  445. }
  446. if (bytes != NULL) {
  447. bytes += converted;
  448. size -= converted;
  449. }
  450. else
  451. size += converted;
  452. }
  453. }
  454. if (result != NULL) {
  455. *bytes = '\0';
  456. break;
  457. }
  458. size += 1; /* nul byte at the end */
  459. result = PyMem_Malloc(size);
  460. if (result == NULL) {
  461. if (error_pos != NULL)
  462. *error_pos = (size_t)-1;
  463. return NULL;
  464. }
  465. bytes = result;
  466. }
  467. return result;
  468. #endif /* __APPLE__ */
  469. }
  470. /* In principle, this should use HAVE__WSTAT, and _wstat
  471. should be detected by autoconf. However, no current
  472. POSIX system provides that function, so testing for
  473. it is pointless.
  474. Not sure whether the MS_WINDOWS guards are necessary:
  475. perhaps for cygwin/mingw builds?
  476. */
  477. #if defined(HAVE_STAT) && !defined(MS_WINDOWS)
  478. /* Get file status. Encode the path to the locale encoding. */
  479. int
  480. _Py_wstat(const wchar_t* path, struct stat *buf)
  481. {
  482. int err;
  483. char *fname;
  484. fname = Py_EncodeLocale(path, NULL);
  485. if (fname == NULL) {
  486. errno = EINVAL;
  487. return -1;
  488. }
  489. err = stat(fname, buf);
  490. PyMem_Free(fname);
  491. return err;
  492. }
  493. #endif
  494. #if defined(HAVE_FSTAT) || defined(MS_WINDOWS)
  495. #ifdef MS_WINDOWS
  496. static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
  497. static void
  498. FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
  499. {
  500. /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
  501. /* Cannot simply cast and dereference in_ptr,
  502. since it might not be aligned properly */
  503. __int64 in;
  504. memcpy(&in, in_ptr, sizeof(in));
  505. *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
  506. *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
  507. }
  508. void
  509. _Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
  510. {
  511. /* XXX endianness */
  512. __int64 out;
  513. out = time_in + secs_between_epochs;
  514. out = out * 10000000 + nsec_in / 100;
  515. memcpy(out_ptr, &out, sizeof(out));
  516. }
  517. /* Below, we *know* that ugo+r is 0444 */
  518. #if _S_IREAD != 0400
  519. #error Unsupported C library
  520. #endif
  521. static int
  522. attributes_to_mode(DWORD attr)
  523. {
  524. int m = 0;
  525. if (attr & FILE_ATTRIBUTE_DIRECTORY)
  526. m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
  527. else
  528. m |= _S_IFREG;
  529. if (attr & FILE_ATTRIBUTE_READONLY)
  530. m |= 0444;
  531. else
  532. m |= 0666;
  533. return m;
  534. }
  535. void
  536. _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag, struct _Py_stat_struct *result)
  537. {
  538. memset(result, 0, sizeof(*result));
  539. result->st_mode = attributes_to_mode(info->dwFileAttributes);
  540. result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
  541. result->st_dev = info->dwVolumeSerialNumber;
  542. result->st_rdev = result->st_dev;
  543. FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
  544. FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
  545. FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
  546. result->st_nlink = info->nNumberOfLinks;
  547. result->st_ino = (((__int64)info->nFileIndexHigh)<<32) + info->nFileIndexLow;
  548. if (reparse_tag == IO_REPARSE_TAG_SYMLINK) {
  549. /* first clear the S_IFMT bits */
  550. result->st_mode ^= (result->st_mode & S_IFMT);
  551. /* now set the bits that make this a symlink */
  552. result->st_mode |= S_IFLNK;
  553. }
  554. result->st_file_attributes = info->dwFileAttributes;
  555. }
  556. #endif
  557. /* Return information about a file.
  558. On POSIX, use fstat().
  559. On Windows, use GetFileType() and GetFileInformationByHandle() which support
  560. files larger than 2 GB. fstat() may fail with EOVERFLOW on files larger
  561. than 2 GB because the file size type is an signed 32-bit integer: see issue
  562. #23152.
  563. */
  564. int
  565. _Py_fstat(int fd, struct _Py_stat_struct *result)
  566. {
  567. #ifdef MS_WINDOWS
  568. BY_HANDLE_FILE_INFORMATION info;
  569. HANDLE h;
  570. int type;
  571. if (!_PyVerify_fd(fd))
  572. h = INVALID_HANDLE_VALUE;
  573. else
  574. h = (HANDLE)_get_osfhandle(fd);
  575. /* Protocol violation: we explicitly clear errno, instead of
  576. setting it to a POSIX error. Callers should use GetLastError. */
  577. errno = 0;
  578. if (h == INVALID_HANDLE_VALUE) {
  579. /* This is really a C library error (invalid file handle).
  580. We set the Win32 error to the closes one matching. */
  581. SetLastError(ERROR_INVALID_HANDLE);
  582. return -1;
  583. }
  584. memset(result, 0, sizeof(*result));
  585. type = GetFileType(h);
  586. if (type == FILE_TYPE_UNKNOWN) {
  587. DWORD error = GetLastError();
  588. if (error != 0) {
  589. return -1;
  590. }
  591. /* else: valid but unknown file */
  592. }
  593. if (type != FILE_TYPE_DISK) {
  594. if (type == FILE_TYPE_CHAR)
  595. result->st_mode = _S_IFCHR;
  596. else if (type == FILE_TYPE_PIPE)
  597. result->st_mode = _S_IFIFO;
  598. return 0;
  599. }
  600. if (!GetFileInformationByHandle(h, &info)) {
  601. return -1;
  602. }
  603. _Py_attribute_data_to_stat(&info, 0, result);
  604. /* specific to fstat() */
  605. result->st_ino = (((__int64)info.nFileIndexHigh)<<32) + info.nFileIndexLow;
  606. return 0;
  607. #else
  608. return fstat(fd, result);
  609. #endif
  610. }
  611. #endif /* HAVE_FSTAT || MS_WINDOWS */
  612. #ifdef HAVE_STAT
  613. /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
  614. call stat() otherwise. Only fill st_mode attribute on Windows.
  615. Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
  616. raised. */
  617. int
  618. _Py_stat(PyObject *path, struct stat *statbuf)
  619. {
  620. #ifdef MS_WINDOWS
  621. int err;
  622. struct _stat wstatbuf;
  623. wchar_t *wpath;
  624. wpath = PyUnicode_AsUnicode(path);
  625. if (wpath == NULL)
  626. return -2;
  627. err = _wstat(wpath, &wstatbuf);
  628. if (!err)
  629. statbuf->st_mode = wstatbuf.st_mode;
  630. return err;
  631. #else
  632. int ret;
  633. PyObject *bytes = PyUnicode_EncodeFSDefault(path);
  634. if (bytes == NULL)
  635. return -2;
  636. ret = stat(PyBytes_AS_STRING(bytes), statbuf);
  637. Py_DECREF(bytes);
  638. return ret;
  639. #endif
  640. }
  641. #endif /* HAVE_STAT */
  642. static int
  643. get_inheritable(int fd, int raise)
  644. {
  645. #ifdef MS_WINDOWS
  646. HANDLE handle;
  647. DWORD flags;
  648. if (!_PyVerify_fd(fd)) {
  649. if (raise)
  650. PyErr_SetFromErrno(PyExc_OSError);
  651. return -1;
  652. }
  653. handle = (HANDLE)_get_osfhandle(fd);
  654. if (handle == INVALID_HANDLE_VALUE) {
  655. if (raise)
  656. PyErr_SetFromErrno(PyExc_OSError);
  657. return -1;
  658. }
  659. if (!GetHandleInformation(handle, &flags)) {
  660. if (raise)
  661. PyErr_SetFromWindowsErr(0);
  662. return -1;
  663. }
  664. return (flags & HANDLE_FLAG_INHERIT);
  665. #else
  666. int flags;
  667. flags = fcntl(fd, F_GETFD, 0);
  668. if (flags == -1) {
  669. if (raise)
  670. PyErr_SetFromErrno(PyExc_OSError);
  671. return -1;
  672. }
  673. return !(flags & FD_CLOEXEC);
  674. #endif
  675. }
  676. /* Get the inheritable flag of the specified file descriptor.
  677. Return 1 if the file descriptor can be inherited, 0 if it cannot,
  678. raise an exception and return -1 on error. */
  679. int
  680. _Py_get_inheritable(int fd)
  681. {
  682. return get_inheritable(fd, 1);
  683. }
  684. static int
  685. set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
  686. {
  687. #ifdef MS_WINDOWS
  688. HANDLE handle;
  689. DWORD flags;
  690. #else
  691. #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
  692. static int ioctl_works = -1;
  693. int request;
  694. int err;
  695. #endif
  696. int flags;
  697. int res;
  698. #endif
  699. /* atomic_flag_works can only be used to make the file descriptor
  700. non-inheritable */
  701. assert(!(atomic_flag_works != NULL && inheritable));
  702. if (atomic_flag_works != NULL && !inheritable) {
  703. if (*atomic_flag_works == -1) {
  704. int isInheritable = get_inheritable(fd, raise);
  705. if (isInheritable == -1)
  706. return -1;
  707. *atomic_flag_works = !isInheritable;
  708. }
  709. if (*atomic_flag_works)
  710. return 0;
  711. }
  712. #ifdef MS_WINDOWS
  713. if (!_PyVerify_fd(fd)) {
  714. if (raise)
  715. PyErr_SetFromErrno(PyExc_OSError);
  716. return -1;
  717. }
  718. handle = (HANDLE)_get_osfhandle(fd);
  719. if (handle == INVALID_HANDLE_VALUE) {
  720. if (raise)
  721. PyErr_SetFromErrno(PyExc_OSError);
  722. return -1;
  723. }
  724. if (inheritable)
  725. flags = HANDLE_FLAG_INHERIT;
  726. else
  727. flags = 0;
  728. if (!SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
  729. if (raise)
  730. PyErr_SetFromWindowsErr(0);
  731. return -1;
  732. }
  733. return 0;
  734. #else
  735. #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
  736. if (ioctl_works != 0) {
  737. /* fast-path: ioctl() only requires one syscall */
  738. if (inheritable)
  739. request = FIONCLEX;
  740. else
  741. request = FIOCLEX;
  742. err = ioctl(fd, request, NULL);
  743. if (!err) {
  744. ioctl_works = 1;
  745. return 0;
  746. }
  747. if (errno != ENOTTY) {
  748. if (raise)
  749. PyErr_SetFromErrno(PyExc_OSError);
  750. return -1;
  751. }
  752. else {
  753. /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
  754. device". The ioctl is declared but not supported by the kernel.
  755. Remember that ioctl() doesn't work. It is the case on
  756. Illumos-based OS for example. */
  757. ioctl_works = 0;
  758. }
  759. /* fallback to fcntl() if ioctl() does not work */
  760. }
  761. #endif
  762. /* slow-path: fcntl() requires two syscalls */
  763. flags = fcntl(fd, F_GETFD);
  764. if (flags < 0) {
  765. if (raise)
  766. PyErr_SetFromErrno(PyExc_OSError);
  767. return -1;
  768. }
  769. if (inheritable)
  770. flags &= ~FD_CLOEXEC;
  771. else
  772. flags |= FD_CLOEXEC;
  773. res = fcntl(fd, F_SETFD, flags);
  774. if (res < 0) {
  775. if (raise)
  776. PyErr_SetFromErrno(PyExc_OSError);
  777. return -1;
  778. }
  779. return 0;
  780. #endif
  781. }
  782. /* Make the file descriptor non-inheritable.
  783. Return 0 on success, set errno and return -1 on error. */
  784. static int
  785. make_non_inheritable(int fd)
  786. {
  787. return set_inheritable(fd, 0, 0, NULL);
  788. }
  789. /* Set the inheritable flag of the specified file descriptor.
  790. On success: return 0, on error: raise an exception if raise is nonzero
  791. and return -1.
  792. If atomic_flag_works is not NULL:
  793. * if *atomic_flag_works==-1, check if the inheritable is set on the file
  794. descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
  795. set the inheritable flag
  796. * if *atomic_flag_works==1: do nothing
  797. * if *atomic_flag_works==0: set inheritable flag to False
  798. Set atomic_flag_works to NULL if no atomic flag was used to create the
  799. file descriptor.
  800. atomic_flag_works can only be used to make a file descriptor
  801. non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
  802. int
  803. _Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
  804. {
  805. return set_inheritable(fd, inheritable, 1, atomic_flag_works);
  806. }
  807. static int
  808. _Py_open_impl(const char *pathname, int flags, int gil_held)
  809. {
  810. int fd;
  811. #ifndef MS_WINDOWS
  812. int *atomic_flag_works;
  813. #endif
  814. #ifdef MS_WINDOWS
  815. flags |= O_NOINHERIT;
  816. #elif defined(O_CLOEXEC)
  817. atomic_flag_works = &_Py_open_cloexec_works;
  818. flags |= O_CLOEXEC;
  819. #else
  820. atomic_flag_works = NULL;
  821. #endif
  822. if (gil_held) {
  823. Py_BEGIN_ALLOW_THREADS
  824. fd = open(pathname, flags);
  825. Py_END_ALLOW_THREADS
  826. if (fd < 0) {
  827. PyErr_SetFromErrnoWithFilename(PyExc_OSError, pathname);
  828. return -1;
  829. }
  830. }
  831. else {
  832. fd = open(pathname, flags);
  833. if (fd < 0)
  834. return -1;
  835. }
  836. #ifndef MS_WINDOWS
  837. if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
  838. close(fd);
  839. return -1;
  840. }
  841. #endif
  842. return fd;
  843. }
  844. /* Open a file with the specified flags (wrapper to open() function).
  845. Return a file descriptor on success. Raise an exception and return -1 on
  846. error.
  847. The file descriptor is created non-inheritable.
  848. The GIL must be held. Use _Py_open_noraise() if the GIL cannot be held. */
  849. int
  850. _Py_open(const char *pathname, int flags)
  851. {
  852. /* _Py_open() must be called with the GIL held. */
  853. assert(PyGILState_Check());
  854. return _Py_open_impl(pathname, flags, 1);
  855. }
  856. /* Open a file with the specified flags (wrapper to open() function).
  857. Return a file descriptor on success. Set errno and return -1 on error.
  858. The file descriptor is created non-inheritable. */
  859. int
  860. _Py_open_noraise(const char *pathname, int flags)
  861. {
  862. return _Py_open_impl(pathname, flags, 0);
  863. }
  864. /* Open a file. Use _wfopen() on Windows, encode the path to the locale
  865. encoding and use fopen() otherwise. The file descriptor is created
  866. non-inheritable. */
  867. FILE *
  868. _Py_wfopen(const wchar_t *path, const wchar_t *mode)
  869. {
  870. FILE *f;
  871. #ifndef MS_WINDOWS
  872. char *cpath;
  873. char cmode[10];
  874. size_t r;
  875. r = wcstombs(cmode, mode, 10);
  876. if (r == (size_t)-1 || r >= 10) {
  877. errno = EINVAL;
  878. return NULL;
  879. }
  880. cpath = Py_EncodeLocale(path, NULL);
  881. if (cpath == NULL)
  882. return NULL;
  883. f = fopen(cpath, cmode);
  884. PyMem_Free(cpath);
  885. #else
  886. f = _wfopen(path, mode);
  887. #endif
  888. if (f == NULL)
  889. return NULL;
  890. if (make_non_inheritable(fileno(f)) < 0) {
  891. fclose(f);
  892. return NULL;
  893. }
  894. return f;
  895. }
  896. /* Wrapper to fopen(). The file descriptor is created non-inheritable. */
  897. FILE*
  898. _Py_fopen(const char *pathname, const char *mode)
  899. {
  900. FILE *f = fopen(pathname, mode);
  901. if (f == NULL)
  902. return NULL;
  903. if (make_non_inheritable(fileno(f)) < 0) {
  904. fclose(f);
  905. return NULL;
  906. }
  907. return f;
  908. }
  909. /* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
  910. encoding and call fopen() otherwise. The file descriptor is created
  911. non-inheritable.
  912. Return the new file object on success, or NULL if the file cannot be open or
  913. (if PyErr_Occurred()) on unicode error. */
  914. FILE*
  915. _Py_fopen_obj(PyObject *path, const char *mode)
  916. {
  917. FILE *f;
  918. #ifdef MS_WINDOWS
  919. wchar_t *wpath;
  920. wchar_t wmode[10];
  921. int usize;
  922. if (!PyUnicode_Check(path)) {
  923. PyErr_Format(PyExc_TypeError,
  924. "str file path expected under Windows, got %R",
  925. Py_TYPE(path));
  926. return NULL;
  927. }
  928. wpath = PyUnicode_AsUnicode(path);
  929. if (wpath == NULL)
  930. return NULL;
  931. usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
  932. if (usize == 0)
  933. return NULL;
  934. f = _wfopen(wpath, wmode);
  935. #else
  936. PyObject *bytes;
  937. if (!PyUnicode_FSConverter(path, &bytes))
  938. return NULL;
  939. f = fopen(PyBytes_AS_STRING(bytes), mode);
  940. Py_DECREF(bytes);
  941. #endif
  942. if (f == NULL)
  943. return NULL;
  944. if (make_non_inheritable(fileno(f)) < 0) {
  945. fclose(f);
  946. return NULL;
  947. }
  948. return f;
  949. }
  950. #ifdef HAVE_READLINK
  951. /* Read value of symbolic link. Encode the path to the locale encoding, decode
  952. the result from the locale encoding. Return -1 on error. */
  953. int
  954. _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
  955. {
  956. char *cpath;
  957. char cbuf[MAXPATHLEN];
  958. wchar_t *wbuf;
  959. int res;
  960. size_t r1;
  961. cpath = Py_EncodeLocale(path, NULL);
  962. if (cpath == NULL) {
  963. errno = EINVAL;
  964. return -1;
  965. }
  966. res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
  967. PyMem_Free(cpath);
  968. if (res == -1)
  969. return -1;
  970. if (res == Py_ARRAY_LENGTH(cbuf)) {
  971. errno = EINVAL;
  972. return -1;
  973. }
  974. cbuf[res] = '\0'; /* buf will be null terminated */
  975. wbuf = Py_DecodeLocale(cbuf, &r1);
  976. if (wbuf == NULL) {
  977. errno = EINVAL;
  978. return -1;
  979. }
  980. if (bufsiz <= r1) {
  981. PyMem_RawFree(wbuf);
  982. errno = EINVAL;
  983. return -1;
  984. }
  985. wcsncpy(buf, wbuf, bufsiz);
  986. PyMem_RawFree(wbuf);
  987. return (int)r1;
  988. }
  989. #endif
  990. #ifdef HAVE_REALPATH
  991. /* Return the canonicalized absolute pathname. Encode path to the locale
  992. encoding, decode the result from the locale encoding.
  993. Return NULL on error. */
  994. wchar_t*
  995. _Py_wrealpath(const wchar_t *path,
  996. wchar_t *resolved_path, size_t resolved_path_size)
  997. {
  998. char *cpath;
  999. char cresolved_path[MAXPATHLEN];
  1000. wchar_t *wresolved_path;
  1001. char *res;
  1002. size_t r;
  1003. cpath = Py_EncodeLocale(path, NULL);
  1004. if (cpath == NULL) {
  1005. errno = EINVAL;
  1006. return NULL;
  1007. }
  1008. res = realpath(cpath, cresolved_path);
  1009. PyMem_Free(cpath);
  1010. if (res == NULL)
  1011. return NULL;
  1012. wresolved_path = Py_DecodeLocale(cresolved_path, &r);
  1013. if (wresolved_path == NULL) {
  1014. errno = EINVAL;
  1015. return NULL;
  1016. }
  1017. if (resolved_path_size <= r) {
  1018. PyMem_RawFree(wresolved_path);
  1019. errno = EINVAL;
  1020. return NULL;
  1021. }
  1022. wcsncpy(resolved_path, wresolved_path, resolved_path_size);
  1023. PyMem_RawFree(wresolved_path);
  1024. return resolved_path;
  1025. }
  1026. #endif
  1027. /* Get the current directory. size is the buffer size in wide characters
  1028. including the null character. Decode the path from the locale encoding.
  1029. Return NULL on error. */
  1030. wchar_t*
  1031. _Py_wgetcwd(wchar_t *buf, size_t size)
  1032. {
  1033. #ifdef MS_WINDOWS
  1034. int isize = (int)Py_MIN(size, INT_MAX);
  1035. return _wgetcwd(buf, isize);
  1036. #else
  1037. char fname[MAXPATHLEN];
  1038. wchar_t *wname;
  1039. size_t len;
  1040. if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
  1041. return NULL;
  1042. wname = Py_DecodeLocale(fname, &len);
  1043. if (wname == NULL)
  1044. return NULL;
  1045. if (size <= len) {
  1046. PyMem_RawFree(wname);
  1047. return NULL;
  1048. }
  1049. wcsncpy(buf, wname, size);
  1050. PyMem_RawFree(wname);
  1051. return buf;
  1052. #endif
  1053. }
  1054. /* Duplicate a file descriptor. The new file descriptor is created as
  1055. non-inheritable. Return a new file descriptor on success, raise an OSError
  1056. exception and return -1 on error.
  1057. The GIL is released to call dup(). The caller must hold the GIL. */
  1058. int
  1059. _Py_dup(int fd)
  1060. {
  1061. #ifdef MS_WINDOWS
  1062. HANDLE handle;
  1063. DWORD ftype;
  1064. #endif
  1065. if (!_PyVerify_fd(fd)) {
  1066. PyErr_SetFromErrno(PyExc_OSError);
  1067. return -1;
  1068. }
  1069. #ifdef MS_WINDOWS
  1070. handle = (HANDLE)_get_osfhandle(fd);
  1071. if (handle == INVALID_HANDLE_VALUE) {
  1072. PyErr_SetFromErrno(PyExc_OSError);
  1073. return -1;
  1074. }
  1075. /* get the file type, ignore the error if it failed */
  1076. ftype = GetFileType(handle);
  1077. Py_BEGIN_ALLOW_THREADS
  1078. fd = dup(fd);
  1079. Py_END_ALLOW_THREADS
  1080. if (fd < 0) {
  1081. PyErr_SetFromErrno(PyExc_OSError);
  1082. return -1;
  1083. }
  1084. /* Character files like console cannot be make non-inheritable */
  1085. if (ftype != FILE_TYPE_CHAR) {
  1086. if (_Py_set_inheritable(fd, 0, NULL) < 0) {
  1087. close(fd);
  1088. return -1;
  1089. }
  1090. }
  1091. #elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
  1092. Py_BEGIN_ALLOW_THREADS
  1093. fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
  1094. Py_END_ALLOW_THREADS
  1095. if (fd < 0) {
  1096. PyErr_SetFromErrno(PyExc_OSError);
  1097. return -1;
  1098. }
  1099. #else
  1100. Py_BEGIN_ALLOW_THREADS
  1101. fd = dup(fd);
  1102. Py_END_ALLOW_THREADS
  1103. if (fd < 0) {
  1104. PyErr_SetFromErrno(PyExc_OSError);
  1105. return -1;
  1106. }
  1107. if (_Py_set_inheritable(fd, 0, NULL) < 0) {
  1108. close(fd);
  1109. return -1;
  1110. }
  1111. #endif
  1112. return fd;
  1113. }
  1114. #ifndef MS_WINDOWS
  1115. /* Get the blocking mode of the file descriptor.
  1116. Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
  1117. raise an exception and return -1 on error. */
  1118. int
  1119. _Py_get_blocking(int fd)
  1120. {
  1121. int flags = fcntl(fd, F_GETFL, 0);
  1122. if (flags < 0) {
  1123. PyErr_SetFromErrno(PyExc_OSError);
  1124. return -1;
  1125. }
  1126. return !(flags & O_NONBLOCK);
  1127. }
  1128. /* Set the blocking mode of the specified file descriptor.
  1129. Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
  1130. otherwise.
  1131. Return 0 on success, raise an exception and return -1 on error. */
  1132. int
  1133. _Py_set_blocking(int fd, int blocking)
  1134. {
  1135. #if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO)
  1136. int arg = !blocking;
  1137. if (ioctl(fd, FIONBIO, &arg) < 0)
  1138. goto error;
  1139. #else
  1140. int flags, res;
  1141. flags = fcntl(fd, F_GETFL, 0);
  1142. if (flags < 0)
  1143. goto error;
  1144. if (blocking)
  1145. flags = flags & (~O_NONBLOCK);
  1146. else
  1147. flags = flags | O_NONBLOCK;
  1148. res = fcntl(fd, F_SETFL, flags);
  1149. if (res < 0)
  1150. goto error;
  1151. #endif
  1152. return 0;
  1153. error:
  1154. PyErr_SetFromErrno(PyExc_OSError);
  1155. return -1;
  1156. }
  1157. #endif
  1158. #ifdef _MSC_VER
  1159. #if _MSC_VER >= 1900
  1160. /* This function lets the Windows CRT validate the file handle without
  1161. terminating the process if it's invalid. */
  1162. int
  1163. _PyVerify_fd(int fd)
  1164. {
  1165. intptr_t osh;
  1166. /* Fast check for the only condition we know */
  1167. if (fd < 0) {
  1168. _set_errno(EBADF);
  1169. return 0;
  1170. }
  1171. osh = _get_osfhandle(fd);
  1172. return osh != (intptr_t)-1;
  1173. }
  1174. #elif _MSC_VER >= 1400
  1175. /* Legacy implementation of _PyVerify_fd while transitioning to
  1176. * MSVC 14.0. This should eventually be removed. (issue23524)
  1177. */
  1178. /* Microsoft CRT in VS2005 and higher will verify that a filehandle is
  1179. * valid and raise an assertion if it isn't.
  1180. * Normally, an invalid fd is likely to be a C program error and therefore
  1181. * an assertion can be useful, but it does contradict the POSIX standard
  1182. * which for write(2) states:
  1183. * "Otherwise, -1 shall be returned and errno set to indicate the error."
  1184. * "[EBADF] The fildes argument is not a valid file descriptor open for
  1185. * writing."
  1186. * Furthermore, python allows the user to enter any old integer
  1187. * as a fd and should merely raise a python exception on error.
  1188. * The Microsoft CRT doesn't provide an official way to check for the
  1189. * validity of a file descriptor, but we can emulate its internal behaviour
  1190. * by using the exported __pinfo data member and knowledge of the
  1191. * internal structures involved.
  1192. * The structures below must be updated for each version of visual studio
  1193. * according to the file internal.h in the CRT source, until MS comes
  1194. * up with a less hacky way to do this.
  1195. * (all of this is to avoid globally modifying the CRT behaviour using
  1196. * _set_invalid_parameter_handler() and _CrtSetReportMode())
  1197. */
  1198. /* The actual size of the structure is determined at runtime.
  1199. * Only the first items must be present.
  1200. */
  1201. typedef struct {
  1202. intptr_t osfhnd;
  1203. char osfile;
  1204. } my_ioinfo;
  1205. extern __declspec(dllimport) char * __pioinfo[];
  1206. #define IOINFO_L2E 5
  1207. #define IOINFO_ARRAYS 64
  1208. #define IOINFO_ARRAY_ELTS (1 << IOINFO_L2E)
  1209. #define _NHANDLE_ (IOINFO_ARRAYS * IOINFO_ARRAY_ELTS)
  1210. #define FOPEN 0x01
  1211. #define _NO_CONSOLE_FILENO (intptr_t)-2
  1212. /* This function emulates what the windows CRT does to validate file handles */
  1213. int
  1214. _PyVerify_fd(int fd)
  1215. {
  1216. const int i1 = fd >> IOINFO_L2E;
  1217. const int i2 = fd & ((1 << IOINFO_L2E) - 1);
  1218. static size_t sizeof_ioinfo = 0;
  1219. /* Determine the actual size of the ioinfo structure,
  1220. * as used by the CRT loaded in memory
  1221. */
  1222. if (sizeof_ioinfo == 0 && __pioinfo[0] != NULL) {
  1223. sizeof_ioinfo = _msize(__pioinfo[0]) / IOINFO_ARRAY_ELTS;
  1224. }
  1225. if (sizeof_ioinfo == 0) {
  1226. /* This should not happen... */
  1227. goto fail;
  1228. }
  1229. /* See that it isn't a special CLEAR fileno */
  1230. if (fd != _NO_CONSOLE_FILENO) {
  1231. /* Microsoft CRT would check that 0<=fd<_nhandle but we can't do that. Instead
  1232. * we check pointer validity and other info
  1233. */
  1234. if (0 <= i1 && i1 < IOINFO_ARRAYS && __pioinfo[i1] != NULL) {
  1235. /* finally, check that the file is open */
  1236. my_ioinfo* info = (my_ioinfo*)(__pioinfo[i1] + i2 * sizeof_ioinfo);
  1237. if (info->osfile & FOPEN) {
  1238. return 1;
  1239. }
  1240. }
  1241. }
  1242. fail:
  1243. errno = EBADF;
  1244. return 0;
  1245. }
  1246. #endif /* _MSC_VER >= 1900 || _MSC_VER >= 1400 */
  1247. #endif /* defined _MSC_VER */