You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1077 lines
28 KiB

13 years ago
  1. #include "Python.h"
  2. #include "osdefs.h"
  3. #include <locale.h>
  4. #ifdef MS_WINDOWS
  5. # include <windows.h>
  6. #endif
  7. #ifdef HAVE_LANGINFO_H
  8. #include <langinfo.h>
  9. #endif
  10. #ifdef HAVE_SYS_IOCTL_H
  11. #include <sys/ioctl.h>
  12. #endif
  13. #ifdef HAVE_FCNTL_H
  14. #include <fcntl.h>
  15. #endif /* HAVE_FCNTL_H */
  16. #ifdef __APPLE__
  17. extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
  18. #endif
  19. #ifdef O_CLOEXEC
  20. /* Does open() support the O_CLOEXEC flag? Possible values:
  21. -1: unknown
  22. 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
  23. 1: open() supports O_CLOEXEC flag, close-on-exec is set
  24. The flag is used by _Py_open(), io.FileIO and os.open() */
  25. int _Py_open_cloexec_works = -1;
  26. #endif
  27. PyObject *
  28. _Py_device_encoding(int fd)
  29. {
  30. #if defined(MS_WINDOWS)
  31. UINT cp;
  32. #endif
  33. if (!_PyVerify_fd(fd) || !isatty(fd)) {
  34. Py_RETURN_NONE;
  35. }
  36. #if defined(MS_WINDOWS)
  37. if (fd == 0)
  38. cp = GetConsoleCP();
  39. else if (fd == 1 || fd == 2)
  40. cp = GetConsoleOutputCP();
  41. else
  42. cp = 0;
  43. /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
  44. has no console */
  45. if (cp != 0)
  46. return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
  47. #elif defined(CODESET)
  48. {
  49. char *codeset = nl_langinfo(CODESET);
  50. if (codeset != NULL && codeset[0] != 0)
  51. return PyUnicode_FromString(codeset);
  52. }
  53. #endif
  54. Py_RETURN_NONE;
  55. }
  56. #if !defined(__APPLE__) && !defined(MS_WINDOWS)
  57. extern int _Py_normalize_encoding(const char *, char *, size_t);
  58. /* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
  59. On these operating systems, nl_langinfo(CODESET) announces an alias of the
  60. ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
  61. ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
  62. locale.getpreferredencoding() codec. For example, if command line arguments
  63. are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
  64. UnicodeEncodeError instead of retrieving the original byte string.
  65. The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
  66. nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
  67. one byte in range 0x80-0xff can be decoded from the locale encoding. The
  68. workaround is also enabled on error, for example if getting the locale
  69. failed.
  70. Values of force_ascii:
  71. 1: the workaround is used: _Py_wchar2char() uses
  72. encode_ascii_surrogateescape() and _Py_char2wchar() uses
  73. decode_ascii_surrogateescape()
  74. 0: the workaround is not used: _Py_wchar2char() uses wcstombs() and
  75. _Py_char2wchar() uses mbstowcs()
  76. -1: unknown, need to call check_force_ascii() to get the value
  77. */
  78. static int force_ascii = -1;
  79. static int
  80. check_force_ascii(void)
  81. {
  82. char *loc;
  83. #if defined(HAVE_LANGINFO_H) && defined(CODESET)
  84. char *codeset, **alias;
  85. char encoding[100];
  86. int is_ascii;
  87. unsigned int i;
  88. char* ascii_aliases[] = {
  89. "ascii",
  90. "646",
  91. "ansi-x3.4-1968",
  92. "ansi-x3-4-1968",
  93. "ansi-x3.4-1986",
  94. "cp367",
  95. "csascii",
  96. "ibm367",
  97. "iso646-us",
  98. "iso-646.irv-1991",
  99. "iso-ir-6",
  100. "us",
  101. "us-ascii",
  102. NULL
  103. };
  104. #endif
  105. loc = setlocale(LC_CTYPE, NULL);
  106. if (loc == NULL)
  107. goto error;
  108. if (strcmp(loc, "C") != 0) {
  109. /* the LC_CTYPE locale is different than C */
  110. return 0;
  111. }
  112. #if defined(HAVE_LANGINFO_H) && defined(CODESET)
  113. codeset = nl_langinfo(CODESET);
  114. if (!codeset || codeset[0] == '\0') {
  115. /* CODESET is not set or empty */
  116. goto error;
  117. }
  118. if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding)))
  119. goto error;
  120. is_ascii = 0;
  121. for (alias=ascii_aliases; *alias != NULL; alias++) {
  122. if (strcmp(encoding, *alias) == 0) {
  123. is_ascii = 1;
  124. break;
  125. }
  126. }
  127. if (!is_ascii) {
  128. /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
  129. return 0;
  130. }
  131. for (i=0x80; i<0xff; i++) {
  132. unsigned char ch;
  133. wchar_t wch;
  134. size_t res;
  135. ch = (unsigned char)i;
  136. res = mbstowcs(&wch, (char*)&ch, 1);
  137. if (res != (size_t)-1) {
  138. /* decoding a non-ASCII character from the locale encoding succeed:
  139. the locale encoding is not ASCII, force ASCII */
  140. return 1;
  141. }
  142. }
  143. /* None of the bytes in the range 0x80-0xff can be decoded from the locale
  144. encoding: the locale encoding is really ASCII */
  145. return 0;
  146. #else
  147. /* nl_langinfo(CODESET) is not available: always force ASCII */
  148. return 1;
  149. #endif
  150. error:
  151. /* if an error occured, force the ASCII encoding */
  152. return 1;
  153. }
  154. static char*
  155. encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)
  156. {
  157. char *result = NULL, *out;
  158. size_t len, i;
  159. wchar_t ch;
  160. if (error_pos != NULL)
  161. *error_pos = (size_t)-1;
  162. len = wcslen(text);
  163. result = PyMem_Malloc(len + 1); /* +1 for NUL byte */
  164. if (result == NULL)
  165. return NULL;
  166. out = result;
  167. for (i=0; i<len; i++) {
  168. ch = text[i];
  169. if (ch <= 0x7f) {
  170. /* ASCII character */
  171. *out++ = (char)ch;
  172. }
  173. else if (0xdc80 <= ch && ch <= 0xdcff) {
  174. /* UTF-8b surrogate */
  175. *out++ = (char)(ch - 0xdc00);
  176. }
  177. else {
  178. if (error_pos != NULL)
  179. *error_pos = i;
  180. PyMem_Free(result);
  181. return NULL;
  182. }
  183. }
  184. *out = '\0';
  185. return result;
  186. }
  187. #endif /* !defined(__APPLE__) && !defined(MS_WINDOWS) */
  188. #if !defined(__APPLE__) && (!defined(MS_WINDOWS) || !defined(HAVE_MBRTOWC))
  189. static wchar_t*
  190. decode_ascii_surrogateescape(const char *arg, size_t *size)
  191. {
  192. wchar_t *res;
  193. unsigned char *in;
  194. wchar_t *out;
  195. size_t argsize = strlen(arg) + 1;
  196. if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t))
  197. return NULL;
  198. res = PyMem_RawMalloc(argsize*sizeof(wchar_t));
  199. if (!res)
  200. return NULL;
  201. in = (unsigned char*)arg;
  202. out = res;
  203. while(*in)
  204. if(*in < 128)
  205. *out++ = *in++;
  206. else
  207. *out++ = 0xdc00 + *in++;
  208. *out = 0;
  209. if (size != NULL)
  210. *size = out - res;
  211. return res;
  212. }
  213. #endif
  214. /* Decode a byte string from the locale encoding with the
  215. surrogateescape error handler (undecodable bytes are decoded as characters
  216. in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
  217. character, escape the bytes using the surrogateescape error handler instead
  218. of decoding them.
  219. Use _Py_wchar2char() to encode the character string back to a byte string.
  220. Return a pointer to a newly allocated wide character string (use
  221. PyMem_RawFree() to free the memory) and write the number of written wide
  222. characters excluding the null character into *size if size is not NULL, or
  223. NULL on error (decoding or memory allocation error). If size is not NULL,
  224. *size is set to (size_t)-1 on memory error and (size_t)-2 on decoding
  225. error.
  226. Conversion errors should never happen, unless there is a bug in the C
  227. library. */
  228. wchar_t*
  229. _Py_char2wchar(const char* arg, size_t *size)
  230. {
  231. #ifdef __APPLE__
  232. wchar_t *wstr;
  233. wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
  234. if (size != NULL) {
  235. if (wstr != NULL)
  236. *size = wcslen(wstr);
  237. else
  238. *size = (size_t)-1;
  239. }
  240. return wstr;
  241. #else
  242. wchar_t *res;
  243. size_t argsize;
  244. size_t count;
  245. #ifdef HAVE_MBRTOWC
  246. unsigned char *in;
  247. wchar_t *out;
  248. mbstate_t mbs;
  249. #endif
  250. #ifndef MS_WINDOWS
  251. if (force_ascii == -1)
  252. force_ascii = check_force_ascii();
  253. if (force_ascii) {
  254. /* force ASCII encoding to workaround mbstowcs() issue */
  255. res = decode_ascii_surrogateescape(arg, size);
  256. if (res == NULL)
  257. goto oom;
  258. return res;
  259. }
  260. #endif
  261. #ifdef HAVE_BROKEN_MBSTOWCS
  262. /* Some platforms have a broken implementation of
  263. * mbstowcs which does not count the characters that
  264. * would result from conversion. Use an upper bound.
  265. */
  266. argsize = strlen(arg);
  267. #else
  268. argsize = mbstowcs(NULL, arg, 0);
  269. #endif
  270. if (argsize != (size_t)-1) {
  271. if (argsize == PY_SSIZE_T_MAX)
  272. goto oom;
  273. argsize += 1;
  274. if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t))
  275. goto oom;
  276. res = (wchar_t *)PyMem_RawMalloc(argsize*sizeof(wchar_t));
  277. if (!res)
  278. goto oom;
  279. count = mbstowcs(res, arg, argsize);
  280. if (count != (size_t)-1) {
  281. wchar_t *tmp;
  282. /* Only use the result if it contains no
  283. surrogate characters. */
  284. for (tmp = res; *tmp != 0 &&
  285. !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
  286. ;
  287. if (*tmp == 0) {
  288. if (size != NULL)
  289. *size = count;
  290. return res;
  291. }
  292. }
  293. PyMem_RawFree(res);
  294. }
  295. /* Conversion failed. Fall back to escaping with surrogateescape. */
  296. #ifdef HAVE_MBRTOWC
  297. /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
  298. /* Overallocate; as multi-byte characters are in the argument, the
  299. actual output could use less memory. */
  300. argsize = strlen(arg) + 1;
  301. if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t))
  302. goto oom;
  303. res = (wchar_t*)PyMem_RawMalloc(argsize*sizeof(wchar_t));
  304. if (!res)
  305. goto oom;
  306. in = (unsigned char*)arg;
  307. out = res;
  308. memset(&mbs, 0, sizeof mbs);
  309. while (argsize) {
  310. size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
  311. if (converted == 0)
  312. /* Reached end of string; null char stored. */
  313. break;
  314. if (converted == (size_t)-2) {
  315. /* Incomplete character. This should never happen,
  316. since we provide everything that we have -
  317. unless there is a bug in the C library, or I
  318. misunderstood how mbrtowc works. */
  319. PyMem_RawFree(res);
  320. if (size != NULL)
  321. *size = (size_t)-2;
  322. return NULL;
  323. }
  324. if (converted == (size_t)-1) {
  325. /* Conversion error. Escape as UTF-8b, and start over
  326. in the initial shift state. */
  327. *out++ = 0xdc00 + *in++;
  328. argsize--;
  329. memset(&mbs, 0, sizeof mbs);
  330. continue;
  331. }
  332. if (Py_UNICODE_IS_SURROGATE(*out)) {
  333. /* Surrogate character. Escape the original
  334. byte sequence with surrogateescape. */
  335. argsize -= converted;
  336. while (converted--)
  337. *out++ = 0xdc00 + *in++;
  338. continue;
  339. }
  340. /* successfully converted some bytes */
  341. in += converted;
  342. argsize -= converted;
  343. out++;
  344. }
  345. if (size != NULL)
  346. *size = out - res;
  347. #else /* HAVE_MBRTOWC */
  348. /* Cannot use C locale for escaping; manually escape as if charset
  349. is ASCII (i.e. escape all bytes > 128. This will still roundtrip
  350. correctly in the locale's charset, which must be an ASCII superset. */
  351. res = decode_ascii_surrogateescape(arg, size);
  352. if (res == NULL)
  353. goto oom;
  354. #endif /* HAVE_MBRTOWC */
  355. return res;
  356. oom:
  357. if (size != NULL)
  358. *size = (size_t)-1;
  359. return NULL;
  360. #endif /* __APPLE__ */
  361. }
  362. /* Encode a (wide) character string to the locale encoding with the
  363. surrogateescape error handler (characters in range U+DC80..U+DCFF are
  364. converted to bytes 0x80..0xFF).
  365. This function is the reverse of _Py_char2wchar().
  366. Return a pointer to a newly allocated byte string (use PyMem_Free() to free
  367. the memory), or NULL on encoding or memory allocation error.
  368. If error_pos is not NULL: *error_pos is the index of the invalid character
  369. on encoding error, or (size_t)-1 otherwise. */
  370. char*
  371. _Py_wchar2char(const wchar_t *text, size_t *error_pos)
  372. {
  373. #ifdef __APPLE__
  374. Py_ssize_t len;
  375. PyObject *unicode, *bytes = NULL;
  376. char *cpath;
  377. unicode = PyUnicode_FromWideChar(text, wcslen(text));
  378. if (unicode == NULL)
  379. return NULL;
  380. bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
  381. Py_DECREF(unicode);
  382. if (bytes == NULL) {
  383. PyErr_Clear();
  384. if (error_pos != NULL)
  385. *error_pos = (size_t)-1;
  386. return NULL;
  387. }
  388. len = PyBytes_GET_SIZE(bytes);
  389. cpath = PyMem_Malloc(len+1);
  390. if (cpath == NULL) {
  391. PyErr_Clear();
  392. Py_DECREF(bytes);
  393. if (error_pos != NULL)
  394. *error_pos = (size_t)-1;
  395. return NULL;
  396. }
  397. memcpy(cpath, PyBytes_AsString(bytes), len + 1);
  398. Py_DECREF(bytes);
  399. return cpath;
  400. #else /* __APPLE__ */
  401. const size_t len = wcslen(text);
  402. char *result = NULL, *bytes = NULL;
  403. size_t i, size, converted;
  404. wchar_t c, buf[2];
  405. #ifndef MS_WINDOWS
  406. if (force_ascii == -1)
  407. force_ascii = check_force_ascii();
  408. if (force_ascii)
  409. return encode_ascii_surrogateescape(text, error_pos);
  410. #endif
  411. /* The function works in two steps:
  412. 1. compute the length of the output buffer in bytes (size)
  413. 2. outputs the bytes */
  414. size = 0;
  415. buf[1] = 0;
  416. while (1) {
  417. for (i=0; i < len; i++) {
  418. c = text[i];
  419. if (c >= 0xdc80 && c <= 0xdcff) {
  420. /* UTF-8b surrogate */
  421. if (bytes != NULL) {
  422. *bytes++ = c - 0xdc00;
  423. size--;
  424. }
  425. else
  426. size++;
  427. continue;
  428. }
  429. else {
  430. buf[0] = c;
  431. if (bytes != NULL)
  432. converted = wcstombs(bytes, buf, size);
  433. else
  434. converted = wcstombs(NULL, buf, 0);
  435. if (converted == (size_t)-1) {
  436. if (result != NULL)
  437. PyMem_Free(result);
  438. if (error_pos != NULL)
  439. *error_pos = i;
  440. return NULL;
  441. }
  442. if (bytes != NULL) {
  443. bytes += converted;
  444. size -= converted;
  445. }
  446. else
  447. size += converted;
  448. }
  449. }
  450. if (result != NULL) {
  451. *bytes = '\0';
  452. break;
  453. }
  454. size += 1; /* nul byte at the end */
  455. result = PyMem_Malloc(size);
  456. if (result == NULL) {
  457. if (error_pos != NULL)
  458. *error_pos = (size_t)-1;
  459. return NULL;
  460. }
  461. bytes = result;
  462. }
  463. return result;
  464. #endif /* __APPLE__ */
  465. }
  466. /* In principle, this should use HAVE__WSTAT, and _wstat
  467. should be detected by autoconf. However, no current
  468. POSIX system provides that function, so testing for
  469. it is pointless.
  470. Not sure whether the MS_WINDOWS guards are necessary:
  471. perhaps for cygwin/mingw builds?
  472. */
  473. #if defined(HAVE_STAT) && !defined(MS_WINDOWS)
  474. /* Get file status. Encode the path to the locale encoding. */
  475. int
  476. _Py_wstat(const wchar_t* path, struct stat *buf)
  477. {
  478. int err;
  479. char *fname;
  480. fname = _Py_wchar2char(path, NULL);
  481. if (fname == NULL) {
  482. errno = EINVAL;
  483. return -1;
  484. }
  485. err = stat(fname, buf);
  486. PyMem_Free(fname);
  487. return err;
  488. }
  489. #endif
  490. #ifdef HAVE_STAT
  491. /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
  492. call stat() otherwise. Only fill st_mode attribute on Windows.
  493. Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
  494. raised. */
  495. int
  496. _Py_stat(PyObject *path, struct stat *statbuf)
  497. {
  498. #ifdef MS_WINDOWS
  499. int err;
  500. struct _stat wstatbuf;
  501. wchar_t *wpath;
  502. wpath = PyUnicode_AsUnicode(path);
  503. if (wpath == NULL)
  504. return -2;
  505. err = _wstat(wpath, &wstatbuf);
  506. if (!err)
  507. statbuf->st_mode = wstatbuf.st_mode;
  508. return err;
  509. #else
  510. int ret;
  511. PyObject *bytes = PyUnicode_EncodeFSDefault(path);
  512. if (bytes == NULL)
  513. return -2;
  514. ret = stat(PyBytes_AS_STRING(bytes), statbuf);
  515. Py_DECREF(bytes);
  516. return ret;
  517. #endif
  518. }
  519. #endif
  520. static int
  521. get_inheritable(int fd, int raise)
  522. {
  523. #ifdef MS_WINDOWS
  524. HANDLE handle;
  525. DWORD flags;
  526. if (!_PyVerify_fd(fd)) {
  527. if (raise)
  528. PyErr_SetFromErrno(PyExc_OSError);
  529. return -1;
  530. }
  531. handle = (HANDLE)_get_osfhandle(fd);
  532. if (handle == INVALID_HANDLE_VALUE) {
  533. if (raise)
  534. PyErr_SetFromErrno(PyExc_OSError);
  535. return -1;
  536. }
  537. if (!GetHandleInformation(handle, &flags)) {
  538. if (raise)
  539. PyErr_SetFromWindowsErr(0);
  540. return -1;
  541. }
  542. return (flags & HANDLE_FLAG_INHERIT);
  543. #else
  544. int flags;
  545. flags = fcntl(fd, F_GETFD, 0);
  546. if (flags == -1) {
  547. if (raise)
  548. PyErr_SetFromErrno(PyExc_OSError);
  549. return -1;
  550. }
  551. return !(flags & FD_CLOEXEC);
  552. #endif
  553. }
  554. /* Get the inheritable flag of the specified file descriptor.
  555. Return 1 if the file descriptor can be inherited, 0 if it cannot,
  556. raise an exception and return -1 on error. */
  557. int
  558. _Py_get_inheritable(int fd)
  559. {
  560. return get_inheritable(fd, 1);
  561. }
  562. static int
  563. set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
  564. {
  565. #ifdef MS_WINDOWS
  566. HANDLE handle;
  567. DWORD flags;
  568. #else
  569. #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
  570. static int ioctl_works = -1;
  571. int request;
  572. int err;
  573. #endif
  574. int flags;
  575. int res;
  576. #endif
  577. /* atomic_flag_works can only be used to make the file descriptor
  578. non-inheritable */
  579. assert(!(atomic_flag_works != NULL && inheritable));
  580. if (atomic_flag_works != NULL && !inheritable) {
  581. if (*atomic_flag_works == -1) {
  582. int isInheritable = get_inheritable(fd, raise);
  583. if (isInheritable == -1)
  584. return -1;
  585. *atomic_flag_works = !isInheritable;
  586. }
  587. if (*atomic_flag_works)
  588. return 0;
  589. }
  590. #ifdef MS_WINDOWS
  591. if (!_PyVerify_fd(fd)) {
  592. if (raise)
  593. PyErr_SetFromErrno(PyExc_OSError);
  594. return -1;
  595. }
  596. handle = (HANDLE)_get_osfhandle(fd);
  597. if (handle == INVALID_HANDLE_VALUE) {
  598. if (raise)
  599. PyErr_SetFromErrno(PyExc_OSError);
  600. return -1;
  601. }
  602. if (inheritable)
  603. flags = HANDLE_FLAG_INHERIT;
  604. else
  605. flags = 0;
  606. if (!SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
  607. if (raise)
  608. PyErr_SetFromWindowsErr(0);
  609. return -1;
  610. }
  611. return 0;
  612. #else
  613. #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
  614. if (ioctl_works != 0) {
  615. /* fast-path: ioctl() only requires one syscall */
  616. if (inheritable)
  617. request = FIONCLEX;
  618. else
  619. request = FIOCLEX;
  620. err = ioctl(fd, request, NULL);
  621. if (!err) {
  622. ioctl_works = 1;
  623. return 0;
  624. }
  625. if (errno != ENOTTY) {
  626. if (raise)
  627. PyErr_SetFromErrno(PyExc_OSError);
  628. return -1;
  629. }
  630. else {
  631. /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
  632. device". The ioctl is declared but not supported by the kernel.
  633. Remember that ioctl() doesn't work. It is the case on
  634. Illumos-based OS for example. */
  635. ioctl_works = 0;
  636. }
  637. /* fallback to fcntl() if ioctl() does not work */
  638. }
  639. #endif
  640. /* slow-path: fcntl() requires two syscalls */
  641. flags = fcntl(fd, F_GETFD);
  642. if (flags < 0) {
  643. if (raise)
  644. PyErr_SetFromErrno(PyExc_OSError);
  645. return -1;
  646. }
  647. if (inheritable)
  648. flags &= ~FD_CLOEXEC;
  649. else
  650. flags |= FD_CLOEXEC;
  651. res = fcntl(fd, F_SETFD, flags);
  652. if (res < 0) {
  653. if (raise)
  654. PyErr_SetFromErrno(PyExc_OSError);
  655. return -1;
  656. }
  657. return 0;
  658. #endif
  659. }
  660. /* Make the file descriptor non-inheritable.
  661. Return 0 on success, set errno and return -1 on error. */
  662. static int
  663. make_non_inheritable(int fd)
  664. {
  665. return set_inheritable(fd, 0, 0, NULL);
  666. }
  667. /* Set the inheritable flag of the specified file descriptor.
  668. On success: return 0, on error: raise an exception if raise is nonzero
  669. and return -1.
  670. If atomic_flag_works is not NULL:
  671. * if *atomic_flag_works==-1, check if the inheritable is set on the file
  672. descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
  673. set the inheritable flag
  674. * if *atomic_flag_works==1: do nothing
  675. * if *atomic_flag_works==0: set inheritable flag to False
  676. Set atomic_flag_works to NULL if no atomic flag was used to create the
  677. file descriptor.
  678. atomic_flag_works can only be used to make a file descriptor
  679. non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
  680. int
  681. _Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
  682. {
  683. return set_inheritable(fd, inheritable, 1, atomic_flag_works);
  684. }
  685. /* Open a file with the specified flags (wrapper to open() function).
  686. The file descriptor is created non-inheritable. */
  687. int
  688. _Py_open(const char *pathname, int flags)
  689. {
  690. int fd;
  691. #ifdef MS_WINDOWS
  692. fd = open(pathname, flags | O_NOINHERIT);
  693. if (fd < 0)
  694. return fd;
  695. #else
  696. int *atomic_flag_works;
  697. #ifdef O_CLOEXEC
  698. atomic_flag_works = &_Py_open_cloexec_works;
  699. flags |= O_CLOEXEC;
  700. #else
  701. atomic_flag_works = NULL;
  702. #endif
  703. fd = open(pathname, flags);
  704. if (fd < 0)
  705. return fd;
  706. if (set_inheritable(fd, 0, 0, atomic_flag_works) < 0) {
  707. close(fd);
  708. return -1;
  709. }
  710. #endif /* !MS_WINDOWS */
  711. return fd;
  712. }
  713. /* Open a file. Use _wfopen() on Windows, encode the path to the locale
  714. encoding and use fopen() otherwise. The file descriptor is created
  715. non-inheritable. */
  716. FILE *
  717. _Py_wfopen(const wchar_t *path, const wchar_t *mode)
  718. {
  719. FILE *f;
  720. #ifndef MS_WINDOWS
  721. char *cpath;
  722. char cmode[10];
  723. size_t r;
  724. r = wcstombs(cmode, mode, 10);
  725. if (r == (size_t)-1 || r >= 10) {
  726. errno = EINVAL;
  727. return NULL;
  728. }
  729. cpath = _Py_wchar2char(path, NULL);
  730. if (cpath == NULL)
  731. return NULL;
  732. f = fopen(cpath, cmode);
  733. PyMem_Free(cpath);
  734. #else
  735. f = _wfopen(path, mode);
  736. #endif
  737. if (f == NULL)
  738. return NULL;
  739. if (make_non_inheritable(fileno(f)) < 0) {
  740. fclose(f);
  741. return NULL;
  742. }
  743. return f;
  744. }
  745. /* Wrapper to fopen(). The file descriptor is created non-inheritable. */
  746. FILE*
  747. _Py_fopen(const char *pathname, const char *mode)
  748. {
  749. FILE *f = fopen(pathname, mode);
  750. if (f == NULL)
  751. return NULL;
  752. if (make_non_inheritable(fileno(f)) < 0) {
  753. fclose(f);
  754. return NULL;
  755. }
  756. return f;
  757. }
  758. /* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
  759. encoding and call fopen() otherwise. The file descriptor is created
  760. non-inheritable.
  761. Return the new file object on success, or NULL if the file cannot be open or
  762. (if PyErr_Occurred()) on unicode error. */
  763. FILE*
  764. _Py_fopen_obj(PyObject *path, const char *mode)
  765. {
  766. FILE *f;
  767. #ifdef MS_WINDOWS
  768. wchar_t *wpath;
  769. wchar_t wmode[10];
  770. int usize;
  771. if (!PyUnicode_Check(path)) {
  772. PyErr_Format(PyExc_TypeError,
  773. "str file path expected under Windows, got %R",
  774. Py_TYPE(path));
  775. return NULL;
  776. }
  777. wpath = PyUnicode_AsUnicode(path);
  778. if (wpath == NULL)
  779. return NULL;
  780. usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
  781. if (usize == 0)
  782. return NULL;
  783. f = _wfopen(wpath, wmode);
  784. #else
  785. PyObject *bytes;
  786. if (!PyUnicode_FSConverter(path, &bytes))
  787. return NULL;
  788. f = fopen(PyBytes_AS_STRING(bytes), mode);
  789. Py_DECREF(bytes);
  790. #endif
  791. if (f == NULL)
  792. return NULL;
  793. if (make_non_inheritable(fileno(f)) < 0) {
  794. fclose(f);
  795. return NULL;
  796. }
  797. return f;
  798. }
  799. #ifdef HAVE_READLINK
  800. /* Read value of symbolic link. Encode the path to the locale encoding, decode
  801. the result from the locale encoding. Return -1 on error. */
  802. int
  803. _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
  804. {
  805. char *cpath;
  806. char cbuf[MAXPATHLEN];
  807. wchar_t *wbuf;
  808. int res;
  809. size_t r1;
  810. cpath = _Py_wchar2char(path, NULL);
  811. if (cpath == NULL) {
  812. errno = EINVAL;
  813. return -1;
  814. }
  815. res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
  816. PyMem_Free(cpath);
  817. if (res == -1)
  818. return -1;
  819. if (res == Py_ARRAY_LENGTH(cbuf)) {
  820. errno = EINVAL;
  821. return -1;
  822. }
  823. cbuf[res] = '\0'; /* buf will be null terminated */
  824. wbuf = _Py_char2wchar(cbuf, &r1);
  825. if (wbuf == NULL) {
  826. errno = EINVAL;
  827. return -1;
  828. }
  829. if (bufsiz <= r1) {
  830. PyMem_RawFree(wbuf);
  831. errno = EINVAL;
  832. return -1;
  833. }
  834. wcsncpy(buf, wbuf, bufsiz);
  835. PyMem_RawFree(wbuf);
  836. return (int)r1;
  837. }
  838. #endif
  839. #ifdef HAVE_REALPATH
  840. /* Return the canonicalized absolute pathname. Encode path to the locale
  841. encoding, decode the result from the locale encoding.
  842. Return NULL on error. */
  843. wchar_t*
  844. _Py_wrealpath(const wchar_t *path,
  845. wchar_t *resolved_path, size_t resolved_path_size)
  846. {
  847. char *cpath;
  848. char cresolved_path[MAXPATHLEN];
  849. wchar_t *wresolved_path;
  850. char *res;
  851. size_t r;
  852. cpath = _Py_wchar2char(path, NULL);
  853. if (cpath == NULL) {
  854. errno = EINVAL;
  855. return NULL;
  856. }
  857. res = realpath(cpath, cresolved_path);
  858. PyMem_Free(cpath);
  859. if (res == NULL)
  860. return NULL;
  861. wresolved_path = _Py_char2wchar(cresolved_path, &r);
  862. if (wresolved_path == NULL) {
  863. errno = EINVAL;
  864. return NULL;
  865. }
  866. if (resolved_path_size <= r) {
  867. PyMem_RawFree(wresolved_path);
  868. errno = EINVAL;
  869. return NULL;
  870. }
  871. wcsncpy(resolved_path, wresolved_path, resolved_path_size);
  872. PyMem_RawFree(wresolved_path);
  873. return resolved_path;
  874. }
  875. #endif
  876. /* Get the current directory. size is the buffer size in wide characters
  877. including the null character. Decode the path from the locale encoding.
  878. Return NULL on error. */
  879. wchar_t*
  880. _Py_wgetcwd(wchar_t *buf, size_t size)
  881. {
  882. #ifdef MS_WINDOWS
  883. int isize = (int)Py_MIN(size, INT_MAX);
  884. return _wgetcwd(buf, isize);
  885. #else
  886. char fname[MAXPATHLEN];
  887. wchar_t *wname;
  888. size_t len;
  889. if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
  890. return NULL;
  891. wname = _Py_char2wchar(fname, &len);
  892. if (wname == NULL)
  893. return NULL;
  894. if (size <= len) {
  895. PyMem_RawFree(wname);
  896. return NULL;
  897. }
  898. wcsncpy(buf, wname, size);
  899. PyMem_RawFree(wname);
  900. return buf;
  901. #endif
  902. }
  903. /* Duplicate a file descriptor. The new file descriptor is created as
  904. non-inheritable. Return a new file descriptor on success, raise an OSError
  905. exception and return -1 on error.
  906. The GIL is released to call dup(). The caller must hold the GIL. */
  907. int
  908. _Py_dup(int fd)
  909. {
  910. #ifdef MS_WINDOWS
  911. HANDLE handle;
  912. DWORD ftype;
  913. #endif
  914. if (!_PyVerify_fd(fd)) {
  915. PyErr_SetFromErrno(PyExc_OSError);
  916. return -1;
  917. }
  918. #ifdef MS_WINDOWS
  919. handle = (HANDLE)_get_osfhandle(fd);
  920. if (handle == INVALID_HANDLE_VALUE) {
  921. PyErr_SetFromErrno(PyExc_OSError);
  922. return -1;
  923. }
  924. /* get the file type, ignore the error if it failed */
  925. ftype = GetFileType(handle);
  926. Py_BEGIN_ALLOW_THREADS
  927. fd = dup(fd);
  928. Py_END_ALLOW_THREADS
  929. if (fd < 0) {
  930. PyErr_SetFromErrno(PyExc_OSError);
  931. return -1;
  932. }
  933. /* Character files like console cannot be make non-inheritable */
  934. if (ftype != FILE_TYPE_CHAR) {
  935. if (_Py_set_inheritable(fd, 0, NULL) < 0) {
  936. close(fd);
  937. return -1;
  938. }
  939. }
  940. #elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
  941. Py_BEGIN_ALLOW_THREADS
  942. fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
  943. Py_END_ALLOW_THREADS
  944. if (fd < 0) {
  945. PyErr_SetFromErrno(PyExc_OSError);
  946. return -1;
  947. }
  948. #else
  949. Py_BEGIN_ALLOW_THREADS
  950. fd = dup(fd);
  951. Py_END_ALLOW_THREADS
  952. if (fd < 0) {
  953. PyErr_SetFromErrno(PyExc_OSError);
  954. return -1;
  955. }
  956. if (_Py_set_inheritable(fd, 0, NULL) < 0) {
  957. close(fd);
  958. return -1;
  959. }
  960. #endif
  961. return fd;
  962. }