You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

536 lines
14 KiB

  1. #include "Python.h"
  2. #include "osdefs.h"
  3. #ifdef MS_WINDOWS
  4. # include <windows.h>
  5. #endif
  6. #ifdef HAVE_LANGINFO_H
  7. #include <langinfo.h>
  8. #endif
  9. #ifdef __APPLE__
  10. extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
  11. #endif
  12. PyObject *
  13. _Py_device_encoding(int fd)
  14. {
  15. #if defined(MS_WINDOWS) || defined(MS_WIN64)
  16. UINT cp;
  17. #endif
  18. if (!_PyVerify_fd(fd) || !isatty(fd)) {
  19. Py_RETURN_NONE;
  20. }
  21. #if defined(MS_WINDOWS) || defined(MS_WIN64)
  22. if (fd == 0)
  23. cp = GetConsoleCP();
  24. else if (fd == 1 || fd == 2)
  25. cp = GetConsoleOutputCP();
  26. else
  27. cp = 0;
  28. /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
  29. has no console */
  30. if (cp != 0)
  31. return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
  32. #elif defined(CODESET)
  33. {
  34. char *codeset = nl_langinfo(CODESET);
  35. if (codeset != NULL && codeset[0] != 0)
  36. return PyUnicode_FromString(codeset);
  37. }
  38. #endif
  39. Py_RETURN_NONE;
  40. }
  41. #ifdef HAVE_STAT
  42. /* Decode a byte string from the locale encoding with the
  43. surrogateescape error handler (undecodable bytes are decoded as characters
  44. in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
  45. character, escape the bytes using the surrogateescape error handler instead
  46. of decoding them.
  47. Use _Py_wchar2char() to encode the character string back to a byte string.
  48. Return a pointer to a newly allocated wide character string (use
  49. PyMem_Free() to free the memory) and write the number of written wide
  50. characters excluding the null character into *size if size is not NULL, or
  51. NULL on error (decoding or memory allocation error). If size is not NULL,
  52. *size is set to (size_t)-1 on memory error and (size_t)-2 on decoding
  53. error.
  54. Conversion errors should never happen, unless there is a bug in the C
  55. library. */
  56. wchar_t*
  57. _Py_char2wchar(const char* arg, size_t *size)
  58. {
  59. #ifdef __APPLE__
  60. wchar_t *wstr;
  61. wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
  62. if (size != NULL) {
  63. if (wstr != NULL)
  64. *size = wcslen(wstr);
  65. else
  66. *size = (size_t)-1;
  67. }
  68. return wstr;
  69. #else
  70. wchar_t *res;
  71. #ifdef HAVE_BROKEN_MBSTOWCS
  72. /* Some platforms have a broken implementation of
  73. * mbstowcs which does not count the characters that
  74. * would result from conversion. Use an upper bound.
  75. */
  76. size_t argsize = strlen(arg);
  77. #else
  78. size_t argsize = mbstowcs(NULL, arg, 0);
  79. #endif
  80. size_t count;
  81. unsigned char *in;
  82. wchar_t *out;
  83. #ifdef HAVE_MBRTOWC
  84. mbstate_t mbs;
  85. #endif
  86. if (argsize != (size_t)-1) {
  87. res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
  88. if (!res)
  89. goto oom;
  90. count = mbstowcs(res, arg, argsize+1);
  91. if (count != (size_t)-1) {
  92. wchar_t *tmp;
  93. /* Only use the result if it contains no
  94. surrogate characters. */
  95. for (tmp = res; *tmp != 0 &&
  96. !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
  97. ;
  98. if (*tmp == 0) {
  99. if (size != NULL)
  100. *size = count;
  101. return res;
  102. }
  103. }
  104. PyMem_Free(res);
  105. }
  106. /* Conversion failed. Fall back to escaping with surrogateescape. */
  107. #ifdef HAVE_MBRTOWC
  108. /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
  109. /* Overallocate; as multi-byte characters are in the argument, the
  110. actual output could use less memory. */
  111. argsize = strlen(arg) + 1;
  112. res = (wchar_t*)PyMem_Malloc(argsize*sizeof(wchar_t));
  113. if (!res)
  114. goto oom;
  115. in = (unsigned char*)arg;
  116. out = res;
  117. memset(&mbs, 0, sizeof mbs);
  118. while (argsize) {
  119. size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
  120. if (converted == 0)
  121. /* Reached end of string; null char stored. */
  122. break;
  123. if (converted == (size_t)-2) {
  124. /* Incomplete character. This should never happen,
  125. since we provide everything that we have -
  126. unless there is a bug in the C library, or I
  127. misunderstood how mbrtowc works. */
  128. PyMem_Free(res);
  129. if (size != NULL)
  130. *size = (size_t)-2;
  131. return NULL;
  132. }
  133. if (converted == (size_t)-1) {
  134. /* Conversion error. Escape as UTF-8b, and start over
  135. in the initial shift state. */
  136. *out++ = 0xdc00 + *in++;
  137. argsize--;
  138. memset(&mbs, 0, sizeof mbs);
  139. continue;
  140. }
  141. if (Py_UNICODE_IS_SURROGATE(*out)) {
  142. /* Surrogate character. Escape the original
  143. byte sequence with surrogateescape. */
  144. argsize -= converted;
  145. while (converted--)
  146. *out++ = 0xdc00 + *in++;
  147. continue;
  148. }
  149. /* successfully converted some bytes */
  150. in += converted;
  151. argsize -= converted;
  152. out++;
  153. }
  154. #else /* HAVE_MBRTOWC */
  155. /* Cannot use C locale for escaping; manually escape as if charset
  156. is ASCII (i.e. escape all bytes > 128. This will still roundtrip
  157. correctly in the locale's charset, which must be an ASCII superset. */
  158. res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
  159. if (!res)
  160. goto oom;
  161. in = (unsigned char*)arg;
  162. out = res;
  163. while(*in)
  164. if(*in < 128)
  165. *out++ = *in++;
  166. else
  167. *out++ = 0xdc00 + *in++;
  168. *out = 0;
  169. #endif /* HAVE_MBRTOWC */
  170. if (size != NULL)
  171. *size = out - res;
  172. return res;
  173. oom:
  174. if (size != NULL)
  175. *size = (size_t)-1;
  176. return NULL;
  177. #endif /* __APPLE__ */
  178. }
  179. /* Encode a (wide) character string to the locale encoding with the
  180. surrogateescape error handler (characters in range U+DC80..U+DCFF are
  181. converted to bytes 0x80..0xFF).
  182. This function is the reverse of _Py_char2wchar().
  183. Return a pointer to a newly allocated byte string (use PyMem_Free() to free
  184. the memory), or NULL on encoding or memory allocation error.
  185. If error_pos is not NULL: *error_pos is the index of the invalid character
  186. on encoding error, or (size_t)-1 otherwise. */
  187. char*
  188. _Py_wchar2char(const wchar_t *text, size_t *error_pos)
  189. {
  190. #ifdef __APPLE__
  191. Py_ssize_t len;
  192. PyObject *unicode, *bytes = NULL;
  193. char *cpath;
  194. unicode = PyUnicode_FromWideChar(text, wcslen(text));
  195. if (unicode == NULL)
  196. return NULL;
  197. bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
  198. Py_DECREF(unicode);
  199. if (bytes == NULL) {
  200. PyErr_Clear();
  201. if (error_pos != NULL)
  202. *error_pos = (size_t)-1;
  203. return NULL;
  204. }
  205. len = PyBytes_GET_SIZE(bytes);
  206. cpath = PyMem_Malloc(len+1);
  207. if (cpath == NULL) {
  208. PyErr_Clear();
  209. Py_DECREF(bytes);
  210. if (error_pos != NULL)
  211. *error_pos = (size_t)-1;
  212. return NULL;
  213. }
  214. memcpy(cpath, PyBytes_AsString(bytes), len + 1);
  215. Py_DECREF(bytes);
  216. return cpath;
  217. #else /* __APPLE__ */
  218. const size_t len = wcslen(text);
  219. char *result = NULL, *bytes = NULL;
  220. size_t i, size, converted;
  221. wchar_t c, buf[2];
  222. /* The function works in two steps:
  223. 1. compute the length of the output buffer in bytes (size)
  224. 2. outputs the bytes */
  225. size = 0;
  226. buf[1] = 0;
  227. while (1) {
  228. for (i=0; i < len; i++) {
  229. c = text[i];
  230. if (c >= 0xdc80 && c <= 0xdcff) {
  231. /* UTF-8b surrogate */
  232. if (bytes != NULL) {
  233. *bytes++ = c - 0xdc00;
  234. size--;
  235. }
  236. else
  237. size++;
  238. continue;
  239. }
  240. else {
  241. buf[0] = c;
  242. if (bytes != NULL)
  243. converted = wcstombs(bytes, buf, size);
  244. else
  245. converted = wcstombs(NULL, buf, 0);
  246. if (converted == (size_t)-1) {
  247. if (result != NULL)
  248. PyMem_Free(result);
  249. if (error_pos != NULL)
  250. *error_pos = i;
  251. return NULL;
  252. }
  253. if (bytes != NULL) {
  254. bytes += converted;
  255. size -= converted;
  256. }
  257. else
  258. size += converted;
  259. }
  260. }
  261. if (result != NULL) {
  262. *bytes = 0;
  263. break;
  264. }
  265. size += 1; /* nul byte at the end */
  266. result = PyMem_Malloc(size);
  267. if (result == NULL) {
  268. if (error_pos != NULL)
  269. *error_pos = (size_t)-1;
  270. return NULL;
  271. }
  272. bytes = result;
  273. }
  274. return result;
  275. #endif /* __APPLE__ */
  276. }
  277. /* In principle, this should use HAVE__WSTAT, and _wstat
  278. should be detected by autoconf. However, no current
  279. POSIX system provides that function, so testing for
  280. it is pointless.
  281. Not sure whether the MS_WINDOWS guards are necessary:
  282. perhaps for cygwin/mingw builds?
  283. */
  284. #if defined(HAVE_STAT) && !defined(MS_WINDOWS)
  285. /* Get file status. Encode the path to the locale encoding. */
  286. int
  287. _Py_wstat(const wchar_t* path, struct stat *buf)
  288. {
  289. int err;
  290. char *fname;
  291. fname = _Py_wchar2char(path, NULL);
  292. if (fname == NULL) {
  293. errno = EINVAL;
  294. return -1;
  295. }
  296. err = stat(fname, buf);
  297. PyMem_Free(fname);
  298. return err;
  299. }
  300. #endif
  301. /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
  302. call stat() otherwise. Only fill st_mode attribute on Windows.
  303. Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
  304. raised. */
  305. int
  306. _Py_stat(PyObject *path, struct stat *statbuf)
  307. {
  308. #ifdef MS_WINDOWS
  309. int err;
  310. struct _stat wstatbuf;
  311. wchar_t *wpath;
  312. wpath = PyUnicode_AsUnicode(path);
  313. if (wpath == NULL)
  314. return -2;
  315. err = _wstat(wpath, &wstatbuf);
  316. if (!err)
  317. statbuf->st_mode = wstatbuf.st_mode;
  318. return err;
  319. #else
  320. int ret;
  321. PyObject *bytes = PyUnicode_EncodeFSDefault(path);
  322. if (bytes == NULL)
  323. return -2;
  324. ret = stat(PyBytes_AS_STRING(bytes), statbuf);
  325. Py_DECREF(bytes);
  326. return ret;
  327. #endif
  328. }
  329. /* Open a file. Use _wfopen() on Windows, encode the path to the locale
  330. encoding and use fopen() otherwise. */
  331. FILE *
  332. _Py_wfopen(const wchar_t *path, const wchar_t *mode)
  333. {
  334. #ifndef MS_WINDOWS
  335. FILE *f;
  336. char *cpath;
  337. char cmode[10];
  338. size_t r;
  339. r = wcstombs(cmode, mode, 10);
  340. if (r == (size_t)-1 || r >= 10) {
  341. errno = EINVAL;
  342. return NULL;
  343. }
  344. cpath = _Py_wchar2char(path, NULL);
  345. if (cpath == NULL)
  346. return NULL;
  347. f = fopen(cpath, cmode);
  348. PyMem_Free(cpath);
  349. return f;
  350. #else
  351. return _wfopen(path, mode);
  352. #endif
  353. }
  354. /* Call _wfopen() on Windows, or encode the path to the filesystem encoding and
  355. call fopen() otherwise.
  356. Return the new file object on success, or NULL if the file cannot be open or
  357. (if PyErr_Occurred()) on unicode error */
  358. FILE*
  359. _Py_fopen(PyObject *path, const char *mode)
  360. {
  361. #ifdef MS_WINDOWS
  362. wchar_t *wpath;
  363. wchar_t wmode[10];
  364. int usize;
  365. if (!PyUnicode_Check(path)) {
  366. PyErr_Format(PyExc_TypeError,
  367. "str file path expected under Windows, got %R",
  368. Py_TYPE(path));
  369. return NULL;
  370. }
  371. wpath = PyUnicode_AsUnicode(path);
  372. if (wpath == NULL)
  373. return NULL;
  374. usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
  375. if (usize == 0)
  376. return NULL;
  377. return _wfopen(wpath, wmode);
  378. #else
  379. FILE *f;
  380. PyObject *bytes;
  381. if (!PyUnicode_FSConverter(path, &bytes))
  382. return NULL;
  383. f = fopen(PyBytes_AS_STRING(bytes), mode);
  384. Py_DECREF(bytes);
  385. return f;
  386. #endif
  387. }
  388. #ifdef HAVE_READLINK
  389. /* Read value of symbolic link. Encode the path to the locale encoding, decode
  390. the result from the locale encoding. Return -1 on error. */
  391. int
  392. _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
  393. {
  394. char *cpath;
  395. char cbuf[PATH_MAX];
  396. wchar_t *wbuf;
  397. int res;
  398. size_t r1;
  399. cpath = _Py_wchar2char(path, NULL);
  400. if (cpath == NULL) {
  401. errno = EINVAL;
  402. return -1;
  403. }
  404. res = (int)readlink(cpath, cbuf, PATH_MAX);
  405. PyMem_Free(cpath);
  406. if (res == -1)
  407. return -1;
  408. if (res == PATH_MAX) {
  409. errno = EINVAL;
  410. return -1;
  411. }
  412. cbuf[res] = '\0'; /* buf will be null terminated */
  413. wbuf = _Py_char2wchar(cbuf, &r1);
  414. if (wbuf == NULL) {
  415. errno = EINVAL;
  416. return -1;
  417. }
  418. if (bufsiz <= r1) {
  419. PyMem_Free(wbuf);
  420. errno = EINVAL;
  421. return -1;
  422. }
  423. wcsncpy(buf, wbuf, bufsiz);
  424. PyMem_Free(wbuf);
  425. return (int)r1;
  426. }
  427. #endif
  428. #ifdef HAVE_REALPATH
  429. /* Return the canonicalized absolute pathname. Encode path to the locale
  430. encoding, decode the result from the locale encoding.
  431. Return NULL on error. */
  432. wchar_t*
  433. _Py_wrealpath(const wchar_t *path,
  434. wchar_t *resolved_path, size_t resolved_path_size)
  435. {
  436. char *cpath;
  437. char cresolved_path[PATH_MAX];
  438. wchar_t *wresolved_path;
  439. char *res;
  440. size_t r;
  441. cpath = _Py_wchar2char(path, NULL);
  442. if (cpath == NULL) {
  443. errno = EINVAL;
  444. return NULL;
  445. }
  446. res = realpath(cpath, cresolved_path);
  447. PyMem_Free(cpath);
  448. if (res == NULL)
  449. return NULL;
  450. wresolved_path = _Py_char2wchar(cresolved_path, &r);
  451. if (wresolved_path == NULL) {
  452. errno = EINVAL;
  453. return NULL;
  454. }
  455. if (resolved_path_size <= r) {
  456. PyMem_Free(wresolved_path);
  457. errno = EINVAL;
  458. return NULL;
  459. }
  460. wcsncpy(resolved_path, wresolved_path, resolved_path_size);
  461. PyMem_Free(wresolved_path);
  462. return resolved_path;
  463. }
  464. #endif
  465. /* Get the current directory. size is the buffer size in wide characters
  466. including the null character. Decode the path from the locale encoding.
  467. Return NULL on error. */
  468. wchar_t*
  469. _Py_wgetcwd(wchar_t *buf, size_t size)
  470. {
  471. #ifdef MS_WINDOWS
  472. return _wgetcwd(buf, size);
  473. #else
  474. char fname[PATH_MAX];
  475. wchar_t *wname;
  476. size_t len;
  477. if (getcwd(fname, PATH_MAX) == NULL)
  478. return NULL;
  479. wname = _Py_char2wchar(fname, &len);
  480. if (wname == NULL)
  481. return NULL;
  482. if (size <= len) {
  483. PyMem_Free(wname);
  484. return NULL;
  485. }
  486. wcsncpy(buf, wname, size);
  487. PyMem_Free(wname);
  488. return buf;
  489. #endif
  490. }
  491. #endif