You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

487 lines
13 KiB

  1. #include "Python.h"
  2. #ifdef MS_WINDOWS
  3. # include <windows.h>
  4. #endif
  5. #ifdef HAVE_LANGINFO_H
  6. #include <langinfo.h>
  7. #endif
  8. PyObject *
  9. _Py_device_encoding(int fd)
  10. {
  11. #if defined(MS_WINDOWS) || defined(MS_WIN64)
  12. UINT cp;
  13. #endif
  14. if (!_PyVerify_fd(fd) || !isatty(fd)) {
  15. Py_RETURN_NONE;
  16. }
  17. #if defined(MS_WINDOWS) || defined(MS_WIN64)
  18. if (fd == 0)
  19. cp = GetConsoleCP();
  20. else if (fd == 1 || fd == 2)
  21. cp = GetConsoleOutputCP();
  22. else
  23. cp = 0;
  24. /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
  25. has no console */
  26. if (cp != 0)
  27. return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
  28. #elif defined(CODESET)
  29. {
  30. char *codeset = nl_langinfo(CODESET);
  31. if (codeset != NULL && codeset[0] != 0)
  32. return PyUnicode_FromString(codeset);
  33. }
  34. #endif
  35. Py_RETURN_NONE;
  36. }
  37. #ifdef HAVE_STAT
  38. /* Decode a byte string from the locale encoding with the
  39. surrogateescape error handler (undecodable bytes are decoded as characters
  40. in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
  41. character, escape the bytes using the surrogateescape error handler instead
  42. of decoding them.
  43. Use _Py_wchar2char() to encode the character string back to a byte string.
  44. Return a pointer to a newly allocated wide character string (use
  45. PyMem_Free() to free the memory) and write the number of written wide
  46. characters excluding the null character into *size if size is not NULL, or
  47. NULL on error (decoding or memory allocation error). If size is not NULL,
  48. *size is set to (size_t)-1 on memory error and (size_t)-2 on decoding
  49. error.
  50. Conversion errors should never happen, unless there is a bug in the C
  51. library. */
  52. wchar_t*
  53. _Py_char2wchar(const char* arg, size_t *size)
  54. {
  55. wchar_t *res;
  56. #ifdef HAVE_BROKEN_MBSTOWCS
  57. /* Some platforms have a broken implementation of
  58. * mbstowcs which does not count the characters that
  59. * would result from conversion. Use an upper bound.
  60. */
  61. size_t argsize = strlen(arg);
  62. #else
  63. size_t argsize = mbstowcs(NULL, arg, 0);
  64. #endif
  65. size_t count;
  66. unsigned char *in;
  67. wchar_t *out;
  68. #ifdef HAVE_MBRTOWC
  69. mbstate_t mbs;
  70. #endif
  71. if (argsize != (size_t)-1) {
  72. res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
  73. if (!res)
  74. goto oom;
  75. count = mbstowcs(res, arg, argsize+1);
  76. if (count != (size_t)-1) {
  77. wchar_t *tmp;
  78. /* Only use the result if it contains no
  79. surrogate characters. */
  80. for (tmp = res; *tmp != 0 &&
  81. (*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
  82. ;
  83. if (*tmp == 0) {
  84. if (size != NULL)
  85. *size = count;
  86. return res;
  87. }
  88. }
  89. PyMem_Free(res);
  90. }
  91. /* Conversion failed. Fall back to escaping with surrogateescape. */
  92. #ifdef HAVE_MBRTOWC
  93. /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
  94. /* Overallocate; as multi-byte characters are in the argument, the
  95. actual output could use less memory. */
  96. argsize = strlen(arg) + 1;
  97. res = (wchar_t*)PyMem_Malloc(argsize*sizeof(wchar_t));
  98. if (!res)
  99. goto oom;
  100. in = (unsigned char*)arg;
  101. out = res;
  102. memset(&mbs, 0, sizeof mbs);
  103. while (argsize) {
  104. size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
  105. if (converted == 0)
  106. /* Reached end of string; null char stored. */
  107. break;
  108. if (converted == (size_t)-2) {
  109. /* Incomplete character. This should never happen,
  110. since we provide everything that we have -
  111. unless there is a bug in the C library, or I
  112. misunderstood how mbrtowc works. */
  113. PyMem_Free(res);
  114. if (size != NULL)
  115. *size = (size_t)-2;
  116. return NULL;
  117. }
  118. if (converted == (size_t)-1) {
  119. /* Conversion error. Escape as UTF-8b, and start over
  120. in the initial shift state. */
  121. *out++ = 0xdc00 + *in++;
  122. argsize--;
  123. memset(&mbs, 0, sizeof mbs);
  124. continue;
  125. }
  126. if (*out >= 0xd800 && *out <= 0xdfff) {
  127. /* Surrogate character. Escape the original
  128. byte sequence with surrogateescape. */
  129. argsize -= converted;
  130. while (converted--)
  131. *out++ = 0xdc00 + *in++;
  132. continue;
  133. }
  134. /* successfully converted some bytes */
  135. in += converted;
  136. argsize -= converted;
  137. out++;
  138. }
  139. #else
  140. /* Cannot use C locale for escaping; manually escape as if charset
  141. is ASCII (i.e. escape all bytes > 128. This will still roundtrip
  142. correctly in the locale's charset, which must be an ASCII superset. */
  143. res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
  144. if (!res)
  145. goto oom;
  146. in = (unsigned char*)arg;
  147. out = res;
  148. while(*in)
  149. if(*in < 128)
  150. *out++ = *in++;
  151. else
  152. *out++ = 0xdc00 + *in++;
  153. *out = 0;
  154. #endif
  155. if (size != NULL)
  156. *size = out - res;
  157. return res;
  158. oom:
  159. if (size != NULL)
  160. *size = (size_t)-1;
  161. return NULL;
  162. }
  163. /* Encode a (wide) character string to the locale encoding with the
  164. surrogateescape error handler (characters in range U+DC80..U+DCFF are
  165. converted to bytes 0x80..0xFF).
  166. This function is the reverse of _Py_char2wchar().
  167. Return a pointer to a newly allocated byte string (use PyMem_Free() to free
  168. the memory), or NULL on encoding or memory allocation error.
  169. If error_pos is not NULL: *error_pos is the index of the invalid character
  170. on encoding error, or (size_t)-1 otherwise. */
  171. char*
  172. _Py_wchar2char(const wchar_t *text, size_t *error_pos)
  173. {
  174. const size_t len = wcslen(text);
  175. char *result = NULL, *bytes = NULL;
  176. size_t i, size, converted;
  177. wchar_t c, buf[2];
  178. if (error_pos != NULL)
  179. *error_pos = (size_t)-1;
  180. /* The function works in two steps:
  181. 1. compute the length of the output buffer in bytes (size)
  182. 2. outputs the bytes */
  183. size = 0;
  184. buf[1] = 0;
  185. while (1) {
  186. for (i=0; i < len; i++) {
  187. c = text[i];
  188. if (c >= 0xdc80 && c <= 0xdcff) {
  189. /* UTF-8b surrogate */
  190. if (bytes != NULL) {
  191. *bytes++ = c - 0xdc00;
  192. size--;
  193. }
  194. else
  195. size++;
  196. continue;
  197. }
  198. else {
  199. buf[0] = c;
  200. if (bytes != NULL)
  201. converted = wcstombs(bytes, buf, size);
  202. else
  203. converted = wcstombs(NULL, buf, 0);
  204. if (converted == (size_t)-1) {
  205. if (result != NULL)
  206. PyMem_Free(result);
  207. if (error_pos != NULL)
  208. *error_pos = i;
  209. return NULL;
  210. }
  211. if (bytes != NULL) {
  212. bytes += converted;
  213. size -= converted;
  214. }
  215. else
  216. size += converted;
  217. }
  218. }
  219. if (result != NULL) {
  220. *bytes = 0;
  221. break;
  222. }
  223. size += 1; /* nul byte at the end */
  224. result = PyMem_Malloc(size);
  225. if (result == NULL)
  226. return NULL;
  227. bytes = result;
  228. }
  229. return result;
  230. }
  231. /* In principle, this should use HAVE__WSTAT, and _wstat
  232. should be detected by autoconf. However, no current
  233. POSIX system provides that function, so testing for
  234. it is pointless.
  235. Not sure whether the MS_WINDOWS guards are necessary:
  236. perhaps for cygwin/mingw builds?
  237. */
  238. #if defined(HAVE_STAT) && !defined(MS_WINDOWS)
  239. /* Get file status. Encode the path to the locale encoding. */
  240. int
  241. _Py_wstat(const wchar_t* path, struct stat *buf)
  242. {
  243. int err;
  244. char *fname;
  245. fname = _Py_wchar2char(path, NULL);
  246. if (fname == NULL) {
  247. errno = EINVAL;
  248. return -1;
  249. }
  250. err = stat(fname, buf);
  251. PyMem_Free(fname);
  252. return err;
  253. }
  254. #endif
  255. /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
  256. call stat() otherwise. Only fill st_mode attribute on Windows.
  257. Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
  258. raised. */
  259. int
  260. _Py_stat(PyObject *path, struct stat *statbuf)
  261. {
  262. #ifdef MS_WINDOWS
  263. int err;
  264. struct _stat wstatbuf;
  265. wchar_t *wpath;
  266. wpath = PyUnicode_AsUnicode(path);
  267. if (wpath == NULL)
  268. return -2;
  269. err = _wstat(wpath, &wstatbuf);
  270. if (!err)
  271. statbuf->st_mode = wstatbuf.st_mode;
  272. return err;
  273. #else
  274. int ret;
  275. PyObject *bytes = PyUnicode_EncodeFSDefault(path);
  276. if (bytes == NULL)
  277. return -2;
  278. ret = stat(PyBytes_AS_STRING(bytes), statbuf);
  279. Py_DECREF(bytes);
  280. return ret;
  281. #endif
  282. }
  283. /* Open a file. Use _wfopen() on Windows, encode the path to the locale
  284. encoding and use fopen() otherwise. */
  285. FILE *
  286. _Py_wfopen(const wchar_t *path, const wchar_t *mode)
  287. {
  288. #ifndef MS_WINDOWS
  289. FILE *f;
  290. char *cpath;
  291. char cmode[10];
  292. size_t r;
  293. r = wcstombs(cmode, mode, 10);
  294. if (r == (size_t)-1 || r >= 10) {
  295. errno = EINVAL;
  296. return NULL;
  297. }
  298. cpath = _Py_wchar2char(path, NULL);
  299. if (cpath == NULL)
  300. return NULL;
  301. f = fopen(cpath, cmode);
  302. PyMem_Free(cpath);
  303. return f;
  304. #else
  305. return _wfopen(path, mode);
  306. #endif
  307. }
  308. /* Call _wfopen() on Windows, or encode the path to the filesystem encoding and
  309. call fopen() otherwise.
  310. Return the new file object on success, or NULL if the file cannot be open or
  311. (if PyErr_Occurred()) on unicode error */
  312. FILE*
  313. _Py_fopen(PyObject *path, const char *mode)
  314. {
  315. #ifdef MS_WINDOWS
  316. wchar_t *wpath;
  317. wchar_t wmode[10];
  318. int usize;
  319. if (!PyUnicode_Check(path)) {
  320. PyErr_Format(PyExc_TypeError,
  321. "str file path expected under Windows, got %R",
  322. Py_TYPE(path));
  323. return NULL;
  324. }
  325. wpath = PyUnicode_AsUnicode(path);
  326. if (wpath == NULL)
  327. return NULL;
  328. usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
  329. if (usize == 0)
  330. return NULL;
  331. return _wfopen(wpath, wmode);
  332. #else
  333. FILE *f;
  334. PyObject *bytes;
  335. if (!PyUnicode_FSConverter(path, &bytes))
  336. return NULL;
  337. f = fopen(PyBytes_AS_STRING(bytes), mode);
  338. Py_DECREF(bytes);
  339. return f;
  340. #endif
  341. }
  342. #ifdef HAVE_READLINK
  343. /* Read value of symbolic link. Encode the path to the locale encoding, decode
  344. the result from the locale encoding. Return -1 on error. */
  345. int
  346. _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
  347. {
  348. char *cpath;
  349. char cbuf[PATH_MAX];
  350. wchar_t *wbuf;
  351. int res;
  352. size_t r1;
  353. cpath = _Py_wchar2char(path, NULL);
  354. if (cpath == NULL) {
  355. errno = EINVAL;
  356. return -1;
  357. }
  358. res = (int)readlink(cpath, cbuf, PATH_MAX);
  359. PyMem_Free(cpath);
  360. if (res == -1)
  361. return -1;
  362. if (res == PATH_MAX) {
  363. errno = EINVAL;
  364. return -1;
  365. }
  366. cbuf[res] = '\0'; /* buf will be null terminated */
  367. wbuf = _Py_char2wchar(cbuf, &r1);
  368. if (wbuf == NULL) {
  369. errno = EINVAL;
  370. return -1;
  371. }
  372. if (bufsiz <= r1) {
  373. PyMem_Free(wbuf);
  374. errno = EINVAL;
  375. return -1;
  376. }
  377. wcsncpy(buf, wbuf, bufsiz);
  378. PyMem_Free(wbuf);
  379. return (int)r1;
  380. }
  381. #endif
  382. #ifdef HAVE_REALPATH
  383. /* Return the canonicalized absolute pathname. Encode path to the locale
  384. encoding, decode the result from the locale encoding.
  385. Return NULL on error. */
  386. wchar_t*
  387. _Py_wrealpath(const wchar_t *path,
  388. wchar_t *resolved_path, size_t resolved_path_size)
  389. {
  390. char *cpath;
  391. char cresolved_path[PATH_MAX];
  392. wchar_t *wresolved_path;
  393. char *res;
  394. size_t r;
  395. cpath = _Py_wchar2char(path, NULL);
  396. if (cpath == NULL) {
  397. errno = EINVAL;
  398. return NULL;
  399. }
  400. res = realpath(cpath, cresolved_path);
  401. PyMem_Free(cpath);
  402. if (res == NULL)
  403. return NULL;
  404. wresolved_path = _Py_char2wchar(cresolved_path, &r);
  405. if (wresolved_path == NULL) {
  406. errno = EINVAL;
  407. return NULL;
  408. }
  409. if (resolved_path_size <= r) {
  410. PyMem_Free(wresolved_path);
  411. errno = EINVAL;
  412. return NULL;
  413. }
  414. wcsncpy(resolved_path, wresolved_path, resolved_path_size);
  415. PyMem_Free(wresolved_path);
  416. return resolved_path;
  417. }
  418. #endif
  419. /* Get the current directory. size is the buffer size in wide characters
  420. including the null character. Decode the path from the locale encoding.
  421. Return NULL on error. */
  422. wchar_t*
  423. _Py_wgetcwd(wchar_t *buf, size_t size)
  424. {
  425. #ifdef MS_WINDOWS
  426. return _wgetcwd(buf, size);
  427. #else
  428. char fname[PATH_MAX];
  429. wchar_t *wname;
  430. size_t len;
  431. if (getcwd(fname, PATH_MAX) == NULL)
  432. return NULL;
  433. wname = _Py_char2wchar(fname, &len);
  434. if (wname == NULL)
  435. return NULL;
  436. if (size <= len) {
  437. PyMem_Free(wname);
  438. return NULL;
  439. }
  440. wcsncpy(buf, wname, size);
  441. PyMem_Free(wname);
  442. return buf;
  443. #endif
  444. }
  445. #endif