You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

531 lines
14 KiB

  1. #include "Python.h"
  2. #include "osdefs.h"
  3. #ifdef MS_WINDOWS
  4. # include <windows.h>
  5. #endif
  6. #ifdef HAVE_LANGINFO_H
  7. #include <langinfo.h>
  8. #endif
  9. #ifdef __APPLE__
  10. extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
  11. #endif
  12. PyObject *
  13. _Py_device_encoding(int fd)
  14. {
  15. #if defined(MS_WINDOWS) || defined(MS_WIN64)
  16. UINT cp;
  17. #endif
  18. if (!_PyVerify_fd(fd) || !isatty(fd)) {
  19. Py_RETURN_NONE;
  20. }
  21. #if defined(MS_WINDOWS) || defined(MS_WIN64)
  22. if (fd == 0)
  23. cp = GetConsoleCP();
  24. else if (fd == 1 || fd == 2)
  25. cp = GetConsoleOutputCP();
  26. else
  27. cp = 0;
  28. /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
  29. has no console */
  30. if (cp != 0)
  31. return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
  32. #elif defined(CODESET)
  33. {
  34. char *codeset = nl_langinfo(CODESET);
  35. if (codeset != NULL && codeset[0] != 0)
  36. return PyUnicode_FromString(codeset);
  37. }
  38. #endif
  39. Py_RETURN_NONE;
  40. }
  41. #ifdef HAVE_STAT
  42. /* Decode a byte string from the locale encoding with the
  43. surrogateescape error handler (undecodable bytes are decoded as characters
  44. in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
  45. character, escape the bytes using the surrogateescape error handler instead
  46. of decoding them.
  47. Use _Py_wchar2char() to encode the character string back to a byte string.
  48. Return a pointer to a newly allocated wide character string (use
  49. PyMem_Free() to free the memory) and write the number of written wide
  50. characters excluding the null character into *size if size is not NULL, or
  51. NULL on error (decoding or memory allocation error). If size is not NULL,
  52. *size is set to (size_t)-1 on memory error and (size_t)-2 on decoding
  53. error.
  54. Conversion errors should never happen, unless there is a bug in the C
  55. library. */
  56. wchar_t*
  57. _Py_char2wchar(const char* arg, size_t *size)
  58. {
  59. #ifdef __APPLE__
  60. wchar_t *wstr;
  61. wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
  62. if (wstr == NULL)
  63. return NULL;
  64. if (size != NULL)
  65. *size = wcslen(wstr);
  66. return wstr;
  67. #else
  68. wchar_t *res;
  69. #ifdef HAVE_BROKEN_MBSTOWCS
  70. /* Some platforms have a broken implementation of
  71. * mbstowcs which does not count the characters that
  72. * would result from conversion. Use an upper bound.
  73. */
  74. size_t argsize = strlen(arg);
  75. #else
  76. size_t argsize = mbstowcs(NULL, arg, 0);
  77. #endif
  78. size_t count;
  79. unsigned char *in;
  80. wchar_t *out;
  81. #ifdef HAVE_MBRTOWC
  82. mbstate_t mbs;
  83. #endif
  84. if (argsize != (size_t)-1) {
  85. res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
  86. if (!res)
  87. goto oom;
  88. count = mbstowcs(res, arg, argsize+1);
  89. if (count != (size_t)-1) {
  90. wchar_t *tmp;
  91. /* Only use the result if it contains no
  92. surrogate characters. */
  93. for (tmp = res; *tmp != 0 &&
  94. !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
  95. ;
  96. if (*tmp == 0) {
  97. if (size != NULL)
  98. *size = count;
  99. return res;
  100. }
  101. }
  102. PyMem_Free(res);
  103. }
  104. /* Conversion failed. Fall back to escaping with surrogateescape. */
  105. #ifdef HAVE_MBRTOWC
  106. /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
  107. /* Overallocate; as multi-byte characters are in the argument, the
  108. actual output could use less memory. */
  109. argsize = strlen(arg) + 1;
  110. res = (wchar_t*)PyMem_Malloc(argsize*sizeof(wchar_t));
  111. if (!res)
  112. goto oom;
  113. in = (unsigned char*)arg;
  114. out = res;
  115. memset(&mbs, 0, sizeof mbs);
  116. while (argsize) {
  117. size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
  118. if (converted == 0)
  119. /* Reached end of string; null char stored. */
  120. break;
  121. if (converted == (size_t)-2) {
  122. /* Incomplete character. This should never happen,
  123. since we provide everything that we have -
  124. unless there is a bug in the C library, or I
  125. misunderstood how mbrtowc works. */
  126. PyMem_Free(res);
  127. if (size != NULL)
  128. *size = (size_t)-2;
  129. return NULL;
  130. }
  131. if (converted == (size_t)-1) {
  132. /* Conversion error. Escape as UTF-8b, and start over
  133. in the initial shift state. */
  134. *out++ = 0xdc00 + *in++;
  135. argsize--;
  136. memset(&mbs, 0, sizeof mbs);
  137. continue;
  138. }
  139. if (Py_UNICODE_IS_SURROGATE(*out)) {
  140. /* Surrogate character. Escape the original
  141. byte sequence with surrogateescape. */
  142. argsize -= converted;
  143. while (converted--)
  144. *out++ = 0xdc00 + *in++;
  145. continue;
  146. }
  147. /* successfully converted some bytes */
  148. in += converted;
  149. argsize -= converted;
  150. out++;
  151. }
  152. #else /* HAVE_MBRTOWC */
  153. /* Cannot use C locale for escaping; manually escape as if charset
  154. is ASCII (i.e. escape all bytes > 128. This will still roundtrip
  155. correctly in the locale's charset, which must be an ASCII superset. */
  156. res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
  157. if (!res)
  158. goto oom;
  159. in = (unsigned char*)arg;
  160. out = res;
  161. while(*in)
  162. if(*in < 128)
  163. *out++ = *in++;
  164. else
  165. *out++ = 0xdc00 + *in++;
  166. *out = 0;
  167. #endif /* HAVE_MBRTOWC */
  168. if (size != NULL)
  169. *size = out - res;
  170. return res;
  171. oom:
  172. if (size != NULL)
  173. *size = (size_t)-1;
  174. return NULL;
  175. #endif /* __APPLE__ */
  176. }
  177. /* Encode a (wide) character string to the locale encoding with the
  178. surrogateescape error handler (characters in range U+DC80..U+DCFF are
  179. converted to bytes 0x80..0xFF).
  180. This function is the reverse of _Py_char2wchar().
  181. Return a pointer to a newly allocated byte string (use PyMem_Free() to free
  182. the memory), or NULL on encoding or memory allocation error.
  183. If error_pos is not NULL: *error_pos is the index of the invalid character
  184. on encoding error, or (size_t)-1 otherwise. */
  185. char*
  186. _Py_wchar2char(const wchar_t *text, size_t *error_pos)
  187. {
  188. #ifdef __APPLE__
  189. Py_ssize_t len;
  190. PyObject *unicode, *bytes = NULL;
  191. char *cpath;
  192. unicode = PyUnicode_FromWideChar(text, wcslen(text));
  193. if (unicode == NULL) {
  194. Py_DECREF(unicode);
  195. return NULL;
  196. }
  197. bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
  198. Py_DECREF(unicode);
  199. if (bytes == NULL) {
  200. PyErr_Clear();
  201. return NULL;
  202. }
  203. len = PyBytes_GET_SIZE(bytes);
  204. cpath = PyMem_Malloc(len+1);
  205. if (cpath == NULL) {
  206. Py_DECREF(bytes);
  207. return NULL;
  208. }
  209. memcpy(cpath, PyBytes_AsString(bytes), len + 1);
  210. Py_DECREF(bytes);
  211. return cpath;
  212. #else /* __APPLE__ */
  213. const size_t len = wcslen(text);
  214. char *result = NULL, *bytes = NULL;
  215. size_t i, size, converted;
  216. wchar_t c, buf[2];
  217. if (error_pos != NULL)
  218. *error_pos = (size_t)-1;
  219. /* The function works in two steps:
  220. 1. compute the length of the output buffer in bytes (size)
  221. 2. outputs the bytes */
  222. size = 0;
  223. buf[1] = 0;
  224. while (1) {
  225. for (i=0; i < len; i++) {
  226. c = text[i];
  227. if (c >= 0xdc80 && c <= 0xdcff) {
  228. /* UTF-8b surrogate */
  229. if (bytes != NULL) {
  230. *bytes++ = c - 0xdc00;
  231. size--;
  232. }
  233. else
  234. size++;
  235. continue;
  236. }
  237. else {
  238. buf[0] = c;
  239. if (bytes != NULL)
  240. converted = wcstombs(bytes, buf, size);
  241. else
  242. converted = wcstombs(NULL, buf, 0);
  243. if (converted == (size_t)-1) {
  244. if (result != NULL)
  245. PyMem_Free(result);
  246. if (error_pos != NULL)
  247. *error_pos = i;
  248. return NULL;
  249. }
  250. if (bytes != NULL) {
  251. bytes += converted;
  252. size -= converted;
  253. }
  254. else
  255. size += converted;
  256. }
  257. }
  258. if (result != NULL) {
  259. *bytes = 0;
  260. break;
  261. }
  262. size += 1; /* nul byte at the end */
  263. result = PyMem_Malloc(size);
  264. if (result == NULL)
  265. return NULL;
  266. bytes = result;
  267. }
  268. return result;
  269. #endif /* __APPLE__ */
  270. }
  271. /* In principle, this should use HAVE__WSTAT, and _wstat
  272. should be detected by autoconf. However, no current
  273. POSIX system provides that function, so testing for
  274. it is pointless.
  275. Not sure whether the MS_WINDOWS guards are necessary:
  276. perhaps for cygwin/mingw builds?
  277. */
  278. #if defined(HAVE_STAT) && !defined(MS_WINDOWS)
  279. /* Get file status. Encode the path to the locale encoding. */
  280. int
  281. _Py_wstat(const wchar_t* path, struct stat *buf)
  282. {
  283. int err;
  284. char *fname;
  285. fname = _Py_wchar2char(path, NULL);
  286. if (fname == NULL) {
  287. errno = EINVAL;
  288. return -1;
  289. }
  290. err = stat(fname, buf);
  291. PyMem_Free(fname);
  292. return err;
  293. }
  294. #endif
  295. /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
  296. call stat() otherwise. Only fill st_mode attribute on Windows.
  297. Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
  298. raised. */
  299. int
  300. _Py_stat(PyObject *path, struct stat *statbuf)
  301. {
  302. #ifdef MS_WINDOWS
  303. int err;
  304. struct _stat wstatbuf;
  305. wchar_t *wpath;
  306. wpath = PyUnicode_AsUnicode(path);
  307. if (wpath == NULL)
  308. return -2;
  309. err = _wstat(wpath, &wstatbuf);
  310. if (!err)
  311. statbuf->st_mode = wstatbuf.st_mode;
  312. return err;
  313. #else
  314. int ret;
  315. PyObject *bytes = PyUnicode_EncodeFSDefault(path);
  316. if (bytes == NULL)
  317. return -2;
  318. ret = stat(PyBytes_AS_STRING(bytes), statbuf);
  319. Py_DECREF(bytes);
  320. return ret;
  321. #endif
  322. }
  323. /* Open a file. Use _wfopen() on Windows, encode the path to the locale
  324. encoding and use fopen() otherwise. */
  325. FILE *
  326. _Py_wfopen(const wchar_t *path, const wchar_t *mode)
  327. {
  328. #ifndef MS_WINDOWS
  329. FILE *f;
  330. char *cpath;
  331. char cmode[10];
  332. size_t r;
  333. r = wcstombs(cmode, mode, 10);
  334. if (r == (size_t)-1 || r >= 10) {
  335. errno = EINVAL;
  336. return NULL;
  337. }
  338. cpath = _Py_wchar2char(path, NULL);
  339. if (cpath == NULL)
  340. return NULL;
  341. f = fopen(cpath, cmode);
  342. PyMem_Free(cpath);
  343. return f;
  344. #else
  345. return _wfopen(path, mode);
  346. #endif
  347. }
  348. /* Call _wfopen() on Windows, or encode the path to the filesystem encoding and
  349. call fopen() otherwise.
  350. Return the new file object on success, or NULL if the file cannot be open or
  351. (if PyErr_Occurred()) on unicode error */
  352. FILE*
  353. _Py_fopen(PyObject *path, const char *mode)
  354. {
  355. #ifdef MS_WINDOWS
  356. wchar_t *wpath;
  357. wchar_t wmode[10];
  358. int usize;
  359. if (!PyUnicode_Check(path)) {
  360. PyErr_Format(PyExc_TypeError,
  361. "str file path expected under Windows, got %R",
  362. Py_TYPE(path));
  363. return NULL;
  364. }
  365. wpath = PyUnicode_AsUnicode(path);
  366. if (wpath == NULL)
  367. return NULL;
  368. usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
  369. if (usize == 0)
  370. return NULL;
  371. return _wfopen(wpath, wmode);
  372. #else
  373. FILE *f;
  374. PyObject *bytes;
  375. if (!PyUnicode_FSConverter(path, &bytes))
  376. return NULL;
  377. f = fopen(PyBytes_AS_STRING(bytes), mode);
  378. Py_DECREF(bytes);
  379. return f;
  380. #endif
  381. }
  382. #ifdef HAVE_READLINK
  383. /* Read value of symbolic link. Encode the path to the locale encoding, decode
  384. the result from the locale encoding. Return -1 on error. */
  385. int
  386. _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
  387. {
  388. char *cpath;
  389. char cbuf[PATH_MAX];
  390. wchar_t *wbuf;
  391. int res;
  392. size_t r1;
  393. cpath = _Py_wchar2char(path, NULL);
  394. if (cpath == NULL) {
  395. errno = EINVAL;
  396. return -1;
  397. }
  398. res = (int)readlink(cpath, cbuf, PATH_MAX);
  399. PyMem_Free(cpath);
  400. if (res == -1)
  401. return -1;
  402. if (res == PATH_MAX) {
  403. errno = EINVAL;
  404. return -1;
  405. }
  406. cbuf[res] = '\0'; /* buf will be null terminated */
  407. wbuf = _Py_char2wchar(cbuf, &r1);
  408. if (wbuf == NULL) {
  409. errno = EINVAL;
  410. return -1;
  411. }
  412. if (bufsiz <= r1) {
  413. PyMem_Free(wbuf);
  414. errno = EINVAL;
  415. return -1;
  416. }
  417. wcsncpy(buf, wbuf, bufsiz);
  418. PyMem_Free(wbuf);
  419. return (int)r1;
  420. }
  421. #endif
  422. #ifdef HAVE_REALPATH
  423. /* Return the canonicalized absolute pathname. Encode path to the locale
  424. encoding, decode the result from the locale encoding.
  425. Return NULL on error. */
  426. wchar_t*
  427. _Py_wrealpath(const wchar_t *path,
  428. wchar_t *resolved_path, size_t resolved_path_size)
  429. {
  430. char *cpath;
  431. char cresolved_path[PATH_MAX];
  432. wchar_t *wresolved_path;
  433. char *res;
  434. size_t r;
  435. cpath = _Py_wchar2char(path, NULL);
  436. if (cpath == NULL) {
  437. errno = EINVAL;
  438. return NULL;
  439. }
  440. res = realpath(cpath, cresolved_path);
  441. PyMem_Free(cpath);
  442. if (res == NULL)
  443. return NULL;
  444. wresolved_path = _Py_char2wchar(cresolved_path, &r);
  445. if (wresolved_path == NULL) {
  446. errno = EINVAL;
  447. return NULL;
  448. }
  449. if (resolved_path_size <= r) {
  450. PyMem_Free(wresolved_path);
  451. errno = EINVAL;
  452. return NULL;
  453. }
  454. wcsncpy(resolved_path, wresolved_path, resolved_path_size);
  455. PyMem_Free(wresolved_path);
  456. return resolved_path;
  457. }
  458. #endif
  459. /* Get the current directory. size is the buffer size in wide characters
  460. including the null character. Decode the path from the locale encoding.
  461. Return NULL on error. */
  462. wchar_t*
  463. _Py_wgetcwd(wchar_t *buf, size_t size)
  464. {
  465. #ifdef MS_WINDOWS
  466. return _wgetcwd(buf, size);
  467. #else
  468. char fname[PATH_MAX];
  469. wchar_t *wname;
  470. size_t len;
  471. if (getcwd(fname, PATH_MAX) == NULL)
  472. return NULL;
  473. wname = _Py_char2wchar(fname, &len);
  474. if (wname == NULL)
  475. return NULL;
  476. if (size <= len) {
  477. PyMem_Free(wname);
  478. return NULL;
  479. }
  480. wcsncpy(buf, wname, size);
  481. PyMem_Free(wname);
  482. return buf;
  483. #endif
  484. }
  485. #endif