You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1046 lines
27 KiB

13 years ago
  1. #include "Python.h"
  2. #include "osdefs.h"
  3. #ifdef MS_WINDOWS
  4. # include <windows.h>
  5. #endif
  6. #ifdef HAVE_LANGINFO_H
  7. #include <locale.h>
  8. #include <langinfo.h>
  9. #endif
  10. #ifdef HAVE_SYS_IOCTL_H
  11. #include <sys/ioctl.h>
  12. #endif
  13. #ifdef HAVE_FCNTL_H
  14. #include <fcntl.h>
  15. #endif /* HAVE_FCNTL_H */
  16. #ifdef __APPLE__
  17. extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
  18. #endif
  19. #ifdef O_CLOEXEC
  20. /* Does open() support the O_CLOEXEC flag? Possible values:
  21. -1: unknown
  22. 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
  23. 1: open() supports O_CLOEXEC flag, close-on-exec is set
  24. The flag is used by _Py_open(), io.FileIO and os.open() */
  25. int _Py_open_cloexec_works = -1;
  26. #endif
  27. PyObject *
  28. _Py_device_encoding(int fd)
  29. {
  30. #if defined(MS_WINDOWS)
  31. UINT cp;
  32. #endif
  33. if (!_PyVerify_fd(fd) || !isatty(fd)) {
  34. Py_RETURN_NONE;
  35. }
  36. #if defined(MS_WINDOWS)
  37. if (fd == 0)
  38. cp = GetConsoleCP();
  39. else if (fd == 1 || fd == 2)
  40. cp = GetConsoleOutputCP();
  41. else
  42. cp = 0;
  43. /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
  44. has no console */
  45. if (cp != 0)
  46. return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
  47. #elif defined(CODESET)
  48. {
  49. char *codeset = nl_langinfo(CODESET);
  50. if (codeset != NULL && codeset[0] != 0)
  51. return PyUnicode_FromString(codeset);
  52. }
  53. #endif
  54. Py_RETURN_NONE;
  55. }
  56. #if !defined(__APPLE__) && !defined(MS_WINDOWS)
  57. extern int _Py_normalize_encoding(const char *, char *, size_t);
  58. /* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
  59. On these operating systems, nl_langinfo(CODESET) announces an alias of the
  60. ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
  61. ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
  62. locale.getpreferredencoding() codec. For example, if command line arguments
  63. are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
  64. UnicodeEncodeError instead of retrieving the original byte string.
  65. The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
  66. nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
  67. one byte in range 0x80-0xff can be decoded from the locale encoding. The
  68. workaround is also enabled on error, for example if getting the locale
  69. failed.
  70. Values of force_ascii:
  71. 1: the workaround is used: _Py_wchar2char() uses
  72. encode_ascii_surrogateescape() and _Py_char2wchar() uses
  73. decode_ascii_surrogateescape()
  74. 0: the workaround is not used: _Py_wchar2char() uses wcstombs() and
  75. _Py_char2wchar() uses mbstowcs()
  76. -1: unknown, need to call check_force_ascii() to get the value
  77. */
  78. static int force_ascii = -1;
  79. static int
  80. check_force_ascii(void)
  81. {
  82. char *loc;
  83. #if defined(HAVE_LANGINFO_H) && defined(CODESET)
  84. char *codeset, **alias;
  85. char encoding[100];
  86. int is_ascii;
  87. unsigned int i;
  88. char* ascii_aliases[] = {
  89. "ascii",
  90. "646",
  91. "ansi-x3.4-1968",
  92. "ansi-x3-4-1968",
  93. "ansi-x3.4-1986",
  94. "cp367",
  95. "csascii",
  96. "ibm367",
  97. "iso646-us",
  98. "iso-646.irv-1991",
  99. "iso-ir-6",
  100. "us",
  101. "us-ascii",
  102. NULL
  103. };
  104. #endif
  105. loc = setlocale(LC_CTYPE, NULL);
  106. if (loc == NULL)
  107. goto error;
  108. if (strcmp(loc, "C") != 0) {
  109. /* the LC_CTYPE locale is different than C */
  110. return 0;
  111. }
  112. #if defined(HAVE_LANGINFO_H) && defined(CODESET)
  113. codeset = nl_langinfo(CODESET);
  114. if (!codeset || codeset[0] == '\0') {
  115. /* CODESET is not set or empty */
  116. goto error;
  117. }
  118. if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding)))
  119. goto error;
  120. is_ascii = 0;
  121. for (alias=ascii_aliases; *alias != NULL; alias++) {
  122. if (strcmp(encoding, *alias) == 0) {
  123. is_ascii = 1;
  124. break;
  125. }
  126. }
  127. if (!is_ascii) {
  128. /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
  129. return 0;
  130. }
  131. for (i=0x80; i<0xff; i++) {
  132. unsigned char ch;
  133. wchar_t wch;
  134. size_t res;
  135. ch = (unsigned char)i;
  136. res = mbstowcs(&wch, (char*)&ch, 1);
  137. if (res != (size_t)-1) {
  138. /* decoding a non-ASCII character from the locale encoding succeed:
  139. the locale encoding is not ASCII, force ASCII */
  140. return 1;
  141. }
  142. }
  143. /* None of the bytes in the range 0x80-0xff can be decoded from the locale
  144. encoding: the locale encoding is really ASCII */
  145. return 0;
  146. #else
  147. /* nl_langinfo(CODESET) is not available: always force ASCII */
  148. return 1;
  149. #endif
  150. error:
  151. /* if an error occured, force the ASCII encoding */
  152. return 1;
  153. }
  154. static char*
  155. encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)
  156. {
  157. char *result = NULL, *out;
  158. size_t len, i;
  159. wchar_t ch;
  160. if (error_pos != NULL)
  161. *error_pos = (size_t)-1;
  162. len = wcslen(text);
  163. result = PyMem_Malloc(len + 1); /* +1 for NUL byte */
  164. if (result == NULL)
  165. return NULL;
  166. out = result;
  167. for (i=0; i<len; i++) {
  168. ch = text[i];
  169. if (ch <= 0x7f) {
  170. /* ASCII character */
  171. *out++ = (char)ch;
  172. }
  173. else if (0xdc80 <= ch && ch <= 0xdcff) {
  174. /* UTF-8b surrogate */
  175. *out++ = (char)(ch - 0xdc00);
  176. }
  177. else {
  178. if (error_pos != NULL)
  179. *error_pos = i;
  180. PyMem_Free(result);
  181. return NULL;
  182. }
  183. }
  184. *out = '\0';
  185. return result;
  186. }
  187. #endif /* !defined(__APPLE__) && !defined(MS_WINDOWS) */
  188. #if !defined(__APPLE__) && (!defined(MS_WINDOWS) || !defined(HAVE_MBRTOWC))
  189. static wchar_t*
  190. decode_ascii_surrogateescape(const char *arg, size_t *size)
  191. {
  192. wchar_t *res;
  193. unsigned char *in;
  194. wchar_t *out;
  195. res = PyMem_RawMalloc((strlen(arg)+1)*sizeof(wchar_t));
  196. if (!res)
  197. return NULL;
  198. in = (unsigned char*)arg;
  199. out = res;
  200. while(*in)
  201. if(*in < 128)
  202. *out++ = *in++;
  203. else
  204. *out++ = 0xdc00 + *in++;
  205. *out = 0;
  206. if (size != NULL)
  207. *size = out - res;
  208. return res;
  209. }
  210. #endif
  211. /* Decode a byte string from the locale encoding with the
  212. surrogateescape error handler (undecodable bytes are decoded as characters
  213. in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
  214. character, escape the bytes using the surrogateescape error handler instead
  215. of decoding them.
  216. Use _Py_wchar2char() to encode the character string back to a byte string.
  217. Return a pointer to a newly allocated wide character string (use
  218. PyMem_RawFree() to free the memory) and write the number of written wide
  219. characters excluding the null character into *size if size is not NULL, or
  220. NULL on error (decoding or memory allocation error). If size is not NULL,
  221. *size is set to (size_t)-1 on memory error and (size_t)-2 on decoding
  222. error.
  223. Conversion errors should never happen, unless there is a bug in the C
  224. library. */
  225. wchar_t*
  226. _Py_char2wchar(const char* arg, size_t *size)
  227. {
  228. #ifdef __APPLE__
  229. wchar_t *wstr;
  230. wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
  231. if (size != NULL) {
  232. if (wstr != NULL)
  233. *size = wcslen(wstr);
  234. else
  235. *size = (size_t)-1;
  236. }
  237. return wstr;
  238. #else
  239. wchar_t *res;
  240. size_t argsize;
  241. size_t count;
  242. #ifdef HAVE_MBRTOWC
  243. unsigned char *in;
  244. wchar_t *out;
  245. mbstate_t mbs;
  246. #endif
  247. #ifndef MS_WINDOWS
  248. if (force_ascii == -1)
  249. force_ascii = check_force_ascii();
  250. if (force_ascii) {
  251. /* force ASCII encoding to workaround mbstowcs() issue */
  252. res = decode_ascii_surrogateescape(arg, size);
  253. if (res == NULL)
  254. goto oom;
  255. return res;
  256. }
  257. #endif
  258. #ifdef HAVE_BROKEN_MBSTOWCS
  259. /* Some platforms have a broken implementation of
  260. * mbstowcs which does not count the characters that
  261. * would result from conversion. Use an upper bound.
  262. */
  263. argsize = strlen(arg);
  264. #else
  265. argsize = mbstowcs(NULL, arg, 0);
  266. #endif
  267. if (argsize != (size_t)-1) {
  268. res = (wchar_t *)PyMem_RawMalloc((argsize+1)*sizeof(wchar_t));
  269. if (!res)
  270. goto oom;
  271. count = mbstowcs(res, arg, argsize+1);
  272. if (count != (size_t)-1) {
  273. wchar_t *tmp;
  274. /* Only use the result if it contains no
  275. surrogate characters. */
  276. for (tmp = res; *tmp != 0 &&
  277. !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
  278. ;
  279. if (*tmp == 0) {
  280. if (size != NULL)
  281. *size = count;
  282. return res;
  283. }
  284. }
  285. PyMem_RawFree(res);
  286. }
  287. /* Conversion failed. Fall back to escaping with surrogateescape. */
  288. #ifdef HAVE_MBRTOWC
  289. /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
  290. /* Overallocate; as multi-byte characters are in the argument, the
  291. actual output could use less memory. */
  292. argsize = strlen(arg) + 1;
  293. res = (wchar_t*)PyMem_RawMalloc(argsize*sizeof(wchar_t));
  294. if (!res)
  295. goto oom;
  296. in = (unsigned char*)arg;
  297. out = res;
  298. memset(&mbs, 0, sizeof mbs);
  299. while (argsize) {
  300. size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
  301. if (converted == 0)
  302. /* Reached end of string; null char stored. */
  303. break;
  304. if (converted == (size_t)-2) {
  305. /* Incomplete character. This should never happen,
  306. since we provide everything that we have -
  307. unless there is a bug in the C library, or I
  308. misunderstood how mbrtowc works. */
  309. PyMem_RawFree(res);
  310. if (size != NULL)
  311. *size = (size_t)-2;
  312. return NULL;
  313. }
  314. if (converted == (size_t)-1) {
  315. /* Conversion error. Escape as UTF-8b, and start over
  316. in the initial shift state. */
  317. *out++ = 0xdc00 + *in++;
  318. argsize--;
  319. memset(&mbs, 0, sizeof mbs);
  320. continue;
  321. }
  322. if (Py_UNICODE_IS_SURROGATE(*out)) {
  323. /* Surrogate character. Escape the original
  324. byte sequence with surrogateescape. */
  325. argsize -= converted;
  326. while (converted--)
  327. *out++ = 0xdc00 + *in++;
  328. continue;
  329. }
  330. /* successfully converted some bytes */
  331. in += converted;
  332. argsize -= converted;
  333. out++;
  334. }
  335. if (size != NULL)
  336. *size = out - res;
  337. #else /* HAVE_MBRTOWC */
  338. /* Cannot use C locale for escaping; manually escape as if charset
  339. is ASCII (i.e. escape all bytes > 128. This will still roundtrip
  340. correctly in the locale's charset, which must be an ASCII superset. */
  341. res = decode_ascii_surrogateescape(arg, size);
  342. if (res == NULL)
  343. goto oom;
  344. #endif /* HAVE_MBRTOWC */
  345. return res;
  346. oom:
  347. if (size != NULL)
  348. *size = (size_t)-1;
  349. return NULL;
  350. #endif /* __APPLE__ */
  351. }
  352. /* Encode a (wide) character string to the locale encoding with the
  353. surrogateescape error handler (characters in range U+DC80..U+DCFF are
  354. converted to bytes 0x80..0xFF).
  355. This function is the reverse of _Py_char2wchar().
  356. Return a pointer to a newly allocated byte string (use PyMem_Free() to free
  357. the memory), or NULL on encoding or memory allocation error.
  358. If error_pos is not NULL: *error_pos is the index of the invalid character
  359. on encoding error, or (size_t)-1 otherwise. */
  360. char*
  361. _Py_wchar2char(const wchar_t *text, size_t *error_pos)
  362. {
  363. #ifdef __APPLE__
  364. Py_ssize_t len;
  365. PyObject *unicode, *bytes = NULL;
  366. char *cpath;
  367. unicode = PyUnicode_FromWideChar(text, wcslen(text));
  368. if (unicode == NULL)
  369. return NULL;
  370. bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
  371. Py_DECREF(unicode);
  372. if (bytes == NULL) {
  373. PyErr_Clear();
  374. if (error_pos != NULL)
  375. *error_pos = (size_t)-1;
  376. return NULL;
  377. }
  378. len = PyBytes_GET_SIZE(bytes);
  379. cpath = PyMem_Malloc(len+1);
  380. if (cpath == NULL) {
  381. PyErr_Clear();
  382. Py_DECREF(bytes);
  383. if (error_pos != NULL)
  384. *error_pos = (size_t)-1;
  385. return NULL;
  386. }
  387. memcpy(cpath, PyBytes_AsString(bytes), len + 1);
  388. Py_DECREF(bytes);
  389. return cpath;
  390. #else /* __APPLE__ */
  391. const size_t len = wcslen(text);
  392. char *result = NULL, *bytes = NULL;
  393. size_t i, size, converted;
  394. wchar_t c, buf[2];
  395. #ifndef MS_WINDOWS
  396. if (force_ascii == -1)
  397. force_ascii = check_force_ascii();
  398. if (force_ascii)
  399. return encode_ascii_surrogateescape(text, error_pos);
  400. #endif
  401. /* The function works in two steps:
  402. 1. compute the length of the output buffer in bytes (size)
  403. 2. outputs the bytes */
  404. size = 0;
  405. buf[1] = 0;
  406. while (1) {
  407. for (i=0; i < len; i++) {
  408. c = text[i];
  409. if (c >= 0xdc80 && c <= 0xdcff) {
  410. /* UTF-8b surrogate */
  411. if (bytes != NULL) {
  412. *bytes++ = c - 0xdc00;
  413. size--;
  414. }
  415. else
  416. size++;
  417. continue;
  418. }
  419. else {
  420. buf[0] = c;
  421. if (bytes != NULL)
  422. converted = wcstombs(bytes, buf, size);
  423. else
  424. converted = wcstombs(NULL, buf, 0);
  425. if (converted == (size_t)-1) {
  426. if (result != NULL)
  427. PyMem_Free(result);
  428. if (error_pos != NULL)
  429. *error_pos = i;
  430. return NULL;
  431. }
  432. if (bytes != NULL) {
  433. bytes += converted;
  434. size -= converted;
  435. }
  436. else
  437. size += converted;
  438. }
  439. }
  440. if (result != NULL) {
  441. *bytes = '\0';
  442. break;
  443. }
  444. size += 1; /* nul byte at the end */
  445. result = PyMem_Malloc(size);
  446. if (result == NULL) {
  447. if (error_pos != NULL)
  448. *error_pos = (size_t)-1;
  449. return NULL;
  450. }
  451. bytes = result;
  452. }
  453. return result;
  454. #endif /* __APPLE__ */
  455. }
  456. /* In principle, this should use HAVE__WSTAT, and _wstat
  457. should be detected by autoconf. However, no current
  458. POSIX system provides that function, so testing for
  459. it is pointless.
  460. Not sure whether the MS_WINDOWS guards are necessary:
  461. perhaps for cygwin/mingw builds?
  462. */
  463. #if defined(HAVE_STAT) && !defined(MS_WINDOWS)
  464. /* Get file status. Encode the path to the locale encoding. */
  465. int
  466. _Py_wstat(const wchar_t* path, struct stat *buf)
  467. {
  468. int err;
  469. char *fname;
  470. fname = _Py_wchar2char(path, NULL);
  471. if (fname == NULL) {
  472. errno = EINVAL;
  473. return -1;
  474. }
  475. err = stat(fname, buf);
  476. PyMem_Free(fname);
  477. return err;
  478. }
  479. #endif
  480. #ifdef HAVE_STAT
  481. /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
  482. call stat() otherwise. Only fill st_mode attribute on Windows.
  483. Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
  484. raised. */
  485. int
  486. _Py_stat(PyObject *path, struct stat *statbuf)
  487. {
  488. #ifdef MS_WINDOWS
  489. int err;
  490. struct _stat wstatbuf;
  491. wchar_t *wpath;
  492. wpath = PyUnicode_AsUnicode(path);
  493. if (wpath == NULL)
  494. return -2;
  495. err = _wstat(wpath, &wstatbuf);
  496. if (!err)
  497. statbuf->st_mode = wstatbuf.st_mode;
  498. return err;
  499. #else
  500. int ret;
  501. PyObject *bytes = PyUnicode_EncodeFSDefault(path);
  502. if (bytes == NULL)
  503. return -2;
  504. ret = stat(PyBytes_AS_STRING(bytes), statbuf);
  505. Py_DECREF(bytes);
  506. return ret;
  507. #endif
  508. }
  509. #endif
  510. static int
  511. get_inheritable(int fd, int raise)
  512. {
  513. #ifdef MS_WINDOWS
  514. HANDLE handle;
  515. DWORD flags;
  516. if (!_PyVerify_fd(fd)) {
  517. if (raise)
  518. PyErr_SetFromErrno(PyExc_OSError);
  519. return -1;
  520. }
  521. handle = (HANDLE)_get_osfhandle(fd);
  522. if (handle == INVALID_HANDLE_VALUE) {
  523. if (raise)
  524. PyErr_SetFromWindowsErr(0);
  525. return -1;
  526. }
  527. if (!GetHandleInformation(handle, &flags)) {
  528. if (raise)
  529. PyErr_SetFromWindowsErr(0);
  530. return -1;
  531. }
  532. return (flags & HANDLE_FLAG_INHERIT);
  533. #else
  534. int flags;
  535. flags = fcntl(fd, F_GETFD, 0);
  536. if (flags == -1) {
  537. if (raise)
  538. PyErr_SetFromErrno(PyExc_OSError);
  539. return -1;
  540. }
  541. return !(flags & FD_CLOEXEC);
  542. #endif
  543. }
  544. /* Get the inheritable flag of the specified file descriptor.
  545. Return 1 if the file descriptor can be inherited, 0 if it cannot,
  546. raise an exception and return -1 on error. */
  547. int
  548. _Py_get_inheritable(int fd)
  549. {
  550. return get_inheritable(fd, 1);
  551. }
  552. static int
  553. set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
  554. {
  555. #ifdef MS_WINDOWS
  556. HANDLE handle;
  557. DWORD flags;
  558. #elif defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
  559. int request;
  560. int err;
  561. #elif defined(HAVE_FCNTL_H)
  562. int flags;
  563. int res;
  564. #endif
  565. /* atomic_flag_works can only be used to make the file descriptor
  566. non-inheritable */
  567. assert(!(atomic_flag_works != NULL && inheritable));
  568. if (atomic_flag_works != NULL && !inheritable) {
  569. if (*atomic_flag_works == -1) {
  570. int inheritable = get_inheritable(fd, raise);
  571. if (inheritable == -1)
  572. return -1;
  573. *atomic_flag_works = !inheritable;
  574. }
  575. if (*atomic_flag_works)
  576. return 0;
  577. }
  578. #ifdef MS_WINDOWS
  579. if (!_PyVerify_fd(fd)) {
  580. if (raise)
  581. PyErr_SetFromErrno(PyExc_OSError);
  582. return -1;
  583. }
  584. handle = (HANDLE)_get_osfhandle(fd);
  585. if (handle == INVALID_HANDLE_VALUE) {
  586. if (raise)
  587. PyErr_SetFromWindowsErr(0);
  588. return -1;
  589. }
  590. if (inheritable)
  591. flags = HANDLE_FLAG_INHERIT;
  592. else
  593. flags = 0;
  594. if (!SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
  595. if (raise)
  596. PyErr_SetFromWindowsErr(0);
  597. return -1;
  598. }
  599. return 0;
  600. #elif defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
  601. if (inheritable)
  602. request = FIONCLEX;
  603. else
  604. request = FIOCLEX;
  605. err = ioctl(fd, request);
  606. if (err) {
  607. if (raise)
  608. PyErr_SetFromErrno(PyExc_OSError);
  609. return -1;
  610. }
  611. return 0;
  612. #else
  613. flags = fcntl(fd, F_GETFD);
  614. if (flags < 0) {
  615. if (raise)
  616. PyErr_SetFromErrno(PyExc_OSError);
  617. return -1;
  618. }
  619. if (inheritable)
  620. flags &= ~FD_CLOEXEC;
  621. else
  622. flags |= FD_CLOEXEC;
  623. res = fcntl(fd, F_SETFD, flags);
  624. if (res < 0) {
  625. if (raise)
  626. PyErr_SetFromErrno(PyExc_OSError);
  627. return -1;
  628. }
  629. return 0;
  630. #endif
  631. }
  632. /* Make the file descriptor non-inheritable.
  633. Return 0 on success, set errno and return -1 on error. */
  634. static int
  635. make_non_inheritable(int fd)
  636. {
  637. return set_inheritable(fd, 0, 0, NULL);
  638. }
  639. /* Set the inheritable flag of the specified file descriptor.
  640. On success: return 0, on error: raise an exception if raise is nonzero
  641. and return -1.
  642. If atomic_flag_works is not NULL:
  643. * if *atomic_flag_works==-1, check if the inheritable is set on the file
  644. descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
  645. set the inheritable flag
  646. * if *atomic_flag_works==1: do nothing
  647. * if *atomic_flag_works==0: set inheritable flag to False
  648. Set atomic_flag_works to NULL if no atomic flag was used to create the
  649. file descriptor.
  650. atomic_flag_works can only be used to make a file descriptor
  651. non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
  652. int
  653. _Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
  654. {
  655. return set_inheritable(fd, inheritable, 1, atomic_flag_works);
  656. }
  657. /* Open a file with the specified flags (wrapper to open() function).
  658. The file descriptor is created non-inheritable. */
  659. int
  660. _Py_open(const char *pathname, int flags)
  661. {
  662. int fd;
  663. #ifdef MS_WINDOWS
  664. fd = open(pathname, flags | O_NOINHERIT);
  665. if (fd < 0)
  666. return fd;
  667. #else
  668. int *atomic_flag_works;
  669. #ifdef O_CLOEXEC
  670. atomic_flag_works = &_Py_open_cloexec_works;
  671. flags |= O_CLOEXEC;
  672. #else
  673. atomic_flag_works = NULL;
  674. #endif
  675. fd = open(pathname, flags);
  676. if (fd < 0)
  677. return fd;
  678. if (set_inheritable(fd, 0, 0, atomic_flag_works) < 0) {
  679. close(fd);
  680. return -1;
  681. }
  682. #endif /* !MS_WINDOWS */
  683. return fd;
  684. }
  685. /* Open a file. Use _wfopen() on Windows, encode the path to the locale
  686. encoding and use fopen() otherwise. The file descriptor is created
  687. non-inheritable. */
  688. FILE *
  689. _Py_wfopen(const wchar_t *path, const wchar_t *mode)
  690. {
  691. FILE *f;
  692. #ifndef MS_WINDOWS
  693. char *cpath;
  694. char cmode[10];
  695. size_t r;
  696. r = wcstombs(cmode, mode, 10);
  697. if (r == (size_t)-1 || r >= 10) {
  698. errno = EINVAL;
  699. return NULL;
  700. }
  701. cpath = _Py_wchar2char(path, NULL);
  702. if (cpath == NULL)
  703. return NULL;
  704. f = fopen(cpath, cmode);
  705. PyMem_Free(cpath);
  706. #else
  707. f = _wfopen(path, mode);
  708. #endif
  709. if (f == NULL)
  710. return NULL;
  711. if (make_non_inheritable(fileno(f)) < 0) {
  712. fclose(f);
  713. return NULL;
  714. }
  715. return f;
  716. }
  717. /* Wrapper to fopen(). The file descriptor is created non-inheritable. */
  718. FILE*
  719. _Py_fopen(const char *pathname, const char *mode)
  720. {
  721. FILE *f = fopen(pathname, mode);
  722. if (f == NULL)
  723. return NULL;
  724. if (make_non_inheritable(fileno(f)) < 0) {
  725. fclose(f);
  726. return NULL;
  727. }
  728. return f;
  729. }
  730. /* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
  731. encoding and call fopen() otherwise. The file descriptor is created
  732. non-inheritable.
  733. Return the new file object on success, or NULL if the file cannot be open or
  734. (if PyErr_Occurred()) on unicode error. */
  735. FILE*
  736. _Py_fopen_obj(PyObject *path, const char *mode)
  737. {
  738. FILE *f;
  739. #ifdef MS_WINDOWS
  740. wchar_t *wpath;
  741. wchar_t wmode[10];
  742. int usize;
  743. if (!PyUnicode_Check(path)) {
  744. PyErr_Format(PyExc_TypeError,
  745. "str file path expected under Windows, got %R",
  746. Py_TYPE(path));
  747. return NULL;
  748. }
  749. wpath = PyUnicode_AsUnicode(path);
  750. if (wpath == NULL)
  751. return NULL;
  752. usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
  753. if (usize == 0)
  754. return NULL;
  755. f = _wfopen(wpath, wmode);
  756. #else
  757. PyObject *bytes;
  758. if (!PyUnicode_FSConverter(path, &bytes))
  759. return NULL;
  760. f = fopen(PyBytes_AS_STRING(bytes), mode);
  761. Py_DECREF(bytes);
  762. #endif
  763. if (f == NULL)
  764. return NULL;
  765. if (make_non_inheritable(fileno(f)) < 0) {
  766. fclose(f);
  767. return NULL;
  768. }
  769. return f;
  770. }
  771. #ifdef HAVE_READLINK
  772. /* Read value of symbolic link. Encode the path to the locale encoding, decode
  773. the result from the locale encoding. Return -1 on error. */
  774. int
  775. _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
  776. {
  777. char *cpath;
  778. char cbuf[PATH_MAX];
  779. wchar_t *wbuf;
  780. int res;
  781. size_t r1;
  782. cpath = _Py_wchar2char(path, NULL);
  783. if (cpath == NULL) {
  784. errno = EINVAL;
  785. return -1;
  786. }
  787. res = (int)readlink(cpath, cbuf, PATH_MAX);
  788. PyMem_Free(cpath);
  789. if (res == -1)
  790. return -1;
  791. if (res == PATH_MAX) {
  792. errno = EINVAL;
  793. return -1;
  794. }
  795. cbuf[res] = '\0'; /* buf will be null terminated */
  796. wbuf = _Py_char2wchar(cbuf, &r1);
  797. if (wbuf == NULL) {
  798. errno = EINVAL;
  799. return -1;
  800. }
  801. if (bufsiz <= r1) {
  802. PyMem_RawFree(wbuf);
  803. errno = EINVAL;
  804. return -1;
  805. }
  806. wcsncpy(buf, wbuf, bufsiz);
  807. PyMem_RawFree(wbuf);
  808. return (int)r1;
  809. }
  810. #endif
  811. #ifdef HAVE_REALPATH
  812. /* Return the canonicalized absolute pathname. Encode path to the locale
  813. encoding, decode the result from the locale encoding.
  814. Return NULL on error. */
  815. wchar_t*
  816. _Py_wrealpath(const wchar_t *path,
  817. wchar_t *resolved_path, size_t resolved_path_size)
  818. {
  819. char *cpath;
  820. char cresolved_path[PATH_MAX];
  821. wchar_t *wresolved_path;
  822. char *res;
  823. size_t r;
  824. cpath = _Py_wchar2char(path, NULL);
  825. if (cpath == NULL) {
  826. errno = EINVAL;
  827. return NULL;
  828. }
  829. res = realpath(cpath, cresolved_path);
  830. PyMem_Free(cpath);
  831. if (res == NULL)
  832. return NULL;
  833. wresolved_path = _Py_char2wchar(cresolved_path, &r);
  834. if (wresolved_path == NULL) {
  835. errno = EINVAL;
  836. return NULL;
  837. }
  838. if (resolved_path_size <= r) {
  839. PyMem_RawFree(wresolved_path);
  840. errno = EINVAL;
  841. return NULL;
  842. }
  843. wcsncpy(resolved_path, wresolved_path, resolved_path_size);
  844. PyMem_RawFree(wresolved_path);
  845. return resolved_path;
  846. }
  847. #endif
  848. /* Get the current directory. size is the buffer size in wide characters
  849. including the null character. Decode the path from the locale encoding.
  850. Return NULL on error. */
  851. wchar_t*
  852. _Py_wgetcwd(wchar_t *buf, size_t size)
  853. {
  854. #ifdef MS_WINDOWS
  855. int isize = (int)Py_MIN(size, INT_MAX);
  856. return _wgetcwd(buf, isize);
  857. #else
  858. char fname[PATH_MAX];
  859. wchar_t *wname;
  860. size_t len;
  861. if (getcwd(fname, PATH_MAX) == NULL)
  862. return NULL;
  863. wname = _Py_char2wchar(fname, &len);
  864. if (wname == NULL)
  865. return NULL;
  866. if (size <= len) {
  867. PyMem_RawFree(wname);
  868. return NULL;
  869. }
  870. wcsncpy(buf, wname, size);
  871. PyMem_RawFree(wname);
  872. return buf;
  873. #endif
  874. }
  875. /* Duplicate a file descriptor. The new file descriptor is created as
  876. non-inheritable. Return a new file descriptor on success, raise an OSError
  877. exception and return -1 on error.
  878. The GIL is released to call dup(). The caller must hold the GIL. */
  879. int
  880. _Py_dup(int fd)
  881. {
  882. #ifdef MS_WINDOWS
  883. HANDLE handle;
  884. DWORD ftype;
  885. #endif
  886. if (!_PyVerify_fd(fd)) {
  887. PyErr_SetFromErrno(PyExc_OSError);
  888. return -1;
  889. }
  890. #ifdef MS_WINDOWS
  891. handle = (HANDLE)_get_osfhandle(fd);
  892. if (handle == INVALID_HANDLE_VALUE) {
  893. PyErr_SetFromWindowsErr(0);
  894. return -1;
  895. }
  896. /* get the file type, ignore the error if it failed */
  897. ftype = GetFileType(handle);
  898. Py_BEGIN_ALLOW_THREADS
  899. fd = dup(fd);
  900. Py_END_ALLOW_THREADS
  901. if (fd < 0) {
  902. PyErr_SetFromErrno(PyExc_OSError);
  903. return -1;
  904. }
  905. /* Character files like console cannot be make non-inheritable */
  906. if (ftype != FILE_TYPE_CHAR) {
  907. if (_Py_set_inheritable(fd, 0, NULL) < 0) {
  908. close(fd);
  909. return -1;
  910. }
  911. }
  912. #elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
  913. Py_BEGIN_ALLOW_THREADS
  914. fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
  915. Py_END_ALLOW_THREADS
  916. if (fd < 0) {
  917. PyErr_SetFromErrno(PyExc_OSError);
  918. return -1;
  919. }
  920. #else
  921. Py_BEGIN_ALLOW_THREADS
  922. fd = dup(fd);
  923. Py_END_ALLOW_THREADS
  924. if (fd < 0) {
  925. PyErr_SetFromErrno(PyExc_OSError);
  926. return -1;
  927. }
  928. if (_Py_set_inheritable(fd, 0, NULL) < 0) {
  929. close(fd);
  930. return -1;
  931. }
  932. #endif
  933. return fd;
  934. }