You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

431 lines
12 KiB

  1. #include "Python.h"
  2. #ifdef MS_WINDOWS
  3. # include <windows.h>
  4. #endif
  5. #ifdef HAVE_STAT
  6. /* Decode a byte string from the locale encoding with the
  7. surrogateescape error handler (undecodable bytes are decoded as characters
  8. in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
  9. character, escape the bytes using the surrogateescape error handler instead
  10. of decoding them.
  11. Use _Py_wchar2char() to encode the character string back to a byte string.
  12. Return a pointer to a newly allocated wide character string (use
  13. PyMem_Free() to free the memory) and write the number of written wide
  14. characters excluding the null character into *size if size is not NULL, or
  15. NULL on error (conversion or memory allocation error).
  16. Conversion errors should never happen, unless there is a bug in the C
  17. library. */
  18. wchar_t*
  19. _Py_char2wchar(const char* arg, size_t *size)
  20. {
  21. wchar_t *res;
  22. #ifdef HAVE_BROKEN_MBSTOWCS
  23. /* Some platforms have a broken implementation of
  24. * mbstowcs which does not count the characters that
  25. * would result from conversion. Use an upper bound.
  26. */
  27. size_t argsize = strlen(arg);
  28. #else
  29. size_t argsize = mbstowcs(NULL, arg, 0);
  30. #endif
  31. size_t count;
  32. unsigned char *in;
  33. wchar_t *out;
  34. #ifdef HAVE_MBRTOWC
  35. mbstate_t mbs;
  36. #endif
  37. if (argsize != (size_t)-1) {
  38. res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
  39. if (!res)
  40. goto oom;
  41. count = mbstowcs(res, arg, argsize+1);
  42. if (count != (size_t)-1) {
  43. wchar_t *tmp;
  44. /* Only use the result if it contains no
  45. surrogate characters. */
  46. for (tmp = res; *tmp != 0 &&
  47. (*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
  48. ;
  49. if (*tmp == 0) {
  50. if (size != NULL)
  51. *size = count;
  52. return res;
  53. }
  54. }
  55. PyMem_Free(res);
  56. }
  57. /* Conversion failed. Fall back to escaping with surrogateescape. */
  58. #ifdef HAVE_MBRTOWC
  59. /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
  60. /* Overallocate; as multi-byte characters are in the argument, the
  61. actual output could use less memory. */
  62. argsize = strlen(arg) + 1;
  63. res = (wchar_t*)PyMem_Malloc(argsize*sizeof(wchar_t));
  64. if (!res)
  65. goto oom;
  66. in = (unsigned char*)arg;
  67. out = res;
  68. memset(&mbs, 0, sizeof mbs);
  69. while (argsize) {
  70. size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
  71. if (converted == 0)
  72. /* Reached end of string; null char stored. */
  73. break;
  74. if (converted == (size_t)-2) {
  75. /* Incomplete character. This should never happen,
  76. since we provide everything that we have -
  77. unless there is a bug in the C library, or I
  78. misunderstood how mbrtowc works. */
  79. fprintf(stderr, "unexpected mbrtowc result -2\n");
  80. PyMem_Free(res);
  81. return NULL;
  82. }
  83. if (converted == (size_t)-1) {
  84. /* Conversion error. Escape as UTF-8b, and start over
  85. in the initial shift state. */
  86. *out++ = 0xdc00 + *in++;
  87. argsize--;
  88. memset(&mbs, 0, sizeof mbs);
  89. continue;
  90. }
  91. if (*out >= 0xd800 && *out <= 0xdfff) {
  92. /* Surrogate character. Escape the original
  93. byte sequence with surrogateescape. */
  94. argsize -= converted;
  95. while (converted--)
  96. *out++ = 0xdc00 + *in++;
  97. continue;
  98. }
  99. /* successfully converted some bytes */
  100. in += converted;
  101. argsize -= converted;
  102. out++;
  103. }
  104. #else
  105. /* Cannot use C locale for escaping; manually escape as if charset
  106. is ASCII (i.e. escape all bytes > 128. This will still roundtrip
  107. correctly in the locale's charset, which must be an ASCII superset. */
  108. res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
  109. if (!res) goto oom;
  110. in = (unsigned char*)arg;
  111. out = res;
  112. while(*in)
  113. if(*in < 128)
  114. *out++ = *in++;
  115. else
  116. *out++ = 0xdc00 + *in++;
  117. *out = 0;
  118. #endif
  119. if (size != NULL)
  120. *size = out - res;
  121. return res;
  122. oom:
  123. fprintf(stderr, "out of memory\n");
  124. return NULL;
  125. }
  126. /* Encode a (wide) character string to the locale encoding with the
  127. surrogateescape error handler (characters in range U+DC80..U+DCFF are
  128. converted to bytes 0x80..0xFF).
  129. This function is the reverse of _Py_char2wchar().
  130. Return a pointer to a newly allocated byte string (use PyMem_Free() to free
  131. the memory), or NULL on conversion or memory allocation error.
  132. If error_pos is not NULL: *error_pos is the index of the invalid character
  133. on conversion error, or (size_t)-1 otherwise. */
  134. char*
  135. _Py_wchar2char(const wchar_t *text, size_t *error_pos)
  136. {
  137. const size_t len = wcslen(text);
  138. char *result = NULL, *bytes = NULL;
  139. size_t i, size, converted;
  140. wchar_t c, buf[2];
  141. if (error_pos != NULL)
  142. *error_pos = (size_t)-1;
  143. /* The function works in two steps:
  144. 1. compute the length of the output buffer in bytes (size)
  145. 2. outputs the bytes */
  146. size = 0;
  147. buf[1] = 0;
  148. while (1) {
  149. for (i=0; i < len; i++) {
  150. c = text[i];
  151. if (c >= 0xdc80 && c <= 0xdcff) {
  152. /* UTF-8b surrogate */
  153. if (bytes != NULL) {
  154. *bytes++ = c - 0xdc00;
  155. size--;
  156. }
  157. else
  158. size++;
  159. continue;
  160. }
  161. else {
  162. buf[0] = c;
  163. if (bytes != NULL)
  164. converted = wcstombs(bytes, buf, size);
  165. else
  166. converted = wcstombs(NULL, buf, 0);
  167. if (converted == (size_t)-1) {
  168. if (result != NULL)
  169. PyMem_Free(result);
  170. if (error_pos != NULL)
  171. *error_pos = i;
  172. return NULL;
  173. }
  174. if (bytes != NULL) {
  175. bytes += converted;
  176. size -= converted;
  177. }
  178. else
  179. size += converted;
  180. }
  181. }
  182. if (result != NULL) {
  183. *bytes = 0;
  184. break;
  185. }
  186. size += 1; /* nul byte at the end */
  187. result = PyMem_Malloc(size);
  188. if (result == NULL)
  189. return NULL;
  190. bytes = result;
  191. }
  192. return result;
  193. }
  194. /* In principle, this should use HAVE__WSTAT, and _wstat
  195. should be detected by autoconf. However, no current
  196. POSIX system provides that function, so testing for
  197. it is pointless.
  198. Not sure whether the MS_WINDOWS guards are necessary:
  199. perhaps for cygwin/mingw builds?
  200. */
  201. #if defined(HAVE_STAT) && !defined(MS_WINDOWS)
  202. /* Get file status. Encode the path to the locale encoding. */
  203. int
  204. _Py_wstat(const wchar_t* path, struct stat *buf)
  205. {
  206. int err;
  207. char *fname;
  208. fname = _Py_wchar2char(path, NULL);
  209. if (fname == NULL) {
  210. errno = EINVAL;
  211. return -1;
  212. }
  213. err = stat(fname, buf);
  214. PyMem_Free(fname);
  215. return err;
  216. }
  217. #endif
  218. /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
  219. call stat() otherwise. Only fill st_mode attribute on Windows.
  220. Return 0 on success, -1 on _wstat() / stat() error or (if PyErr_Occurred())
  221. unicode error. */
  222. int
  223. _Py_stat(PyObject *path, struct stat *statbuf)
  224. {
  225. #ifdef MS_WINDOWS
  226. int err;
  227. struct _stat wstatbuf;
  228. err = _wstat(PyUnicode_AS_UNICODE(path), &wstatbuf);
  229. if (!err)
  230. statbuf->st_mode = wstatbuf.st_mode;
  231. return err;
  232. #else
  233. int ret;
  234. PyObject *bytes = PyUnicode_EncodeFSDefault(path);
  235. if (bytes == NULL)
  236. return -1;
  237. ret = stat(PyBytes_AS_STRING(bytes), statbuf);
  238. Py_DECREF(bytes);
  239. return ret;
  240. #endif
  241. }
  242. /* Open a file. Use _wfopen() on Windows, encode the path to the locale
  243. encoding and use fopen() otherwise. */
  244. FILE *
  245. _Py_wfopen(const wchar_t *path, const wchar_t *mode)
  246. {
  247. #ifndef MS_WINDOWS
  248. FILE *f;
  249. char *cpath;
  250. char cmode[10];
  251. size_t r;
  252. r = wcstombs(cmode, mode, 10);
  253. if (r == (size_t)-1 || r >= 10) {
  254. errno = EINVAL;
  255. return NULL;
  256. }
  257. cpath = _Py_wchar2char(path, NULL);
  258. if (cpath == NULL)
  259. return NULL;
  260. f = fopen(cpath, cmode);
  261. PyMem_Free(cpath);
  262. return f;
  263. #else
  264. return _wfopen(path, mode);
  265. #endif
  266. }
  267. /* Call _wfopen() on Windows, or encode the path to the filesystem encoding and
  268. call fopen() otherwise.
  269. Return the new file object on success, or NULL if the file cannot be open or
  270. (if PyErr_Occurred()) on unicode error */
  271. FILE*
  272. _Py_fopen(PyObject *path, const char *mode)
  273. {
  274. #ifdef MS_WINDOWS
  275. wchar_t wmode[10];
  276. int usize;
  277. usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
  278. if (usize == 0)
  279. return NULL;
  280. return _wfopen(PyUnicode_AS_UNICODE(path), wmode);
  281. #else
  282. FILE *f;
  283. PyObject *bytes = PyUnicode_EncodeFSDefault(path);
  284. if (bytes == NULL)
  285. return NULL;
  286. f = fopen(PyBytes_AS_STRING(bytes), mode);
  287. Py_DECREF(bytes);
  288. return f;
  289. #endif
  290. }
  291. #ifdef HAVE_READLINK
  292. /* Read value of symbolic link. Encode the path to the locale encoding, decode
  293. the result from the locale encoding. */
  294. int
  295. _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
  296. {
  297. char *cpath;
  298. char cbuf[PATH_MAX];
  299. wchar_t *wbuf;
  300. int res;
  301. size_t r1;
  302. cpath = _Py_wchar2char(path, NULL);
  303. if (cpath == NULL) {
  304. errno = EINVAL;
  305. return -1;
  306. }
  307. res = (int)readlink(cpath, cbuf, PATH_MAX);
  308. PyMem_Free(cpath);
  309. if (res == -1)
  310. return -1;
  311. if (res == PATH_MAX) {
  312. errno = EINVAL;
  313. return -1;
  314. }
  315. cbuf[res] = '\0'; /* buf will be null terminated */
  316. wbuf = _Py_char2wchar(cbuf, &r1);
  317. if (wbuf == NULL) {
  318. errno = EINVAL;
  319. return -1;
  320. }
  321. if (bufsiz <= r1) {
  322. PyMem_Free(wbuf);
  323. errno = EINVAL;
  324. return -1;
  325. }
  326. wcsncpy(buf, wbuf, bufsiz);
  327. PyMem_Free(wbuf);
  328. return (int)r1;
  329. }
  330. #endif
  331. #ifdef HAVE_REALPATH
  332. /* Return the canonicalized absolute pathname. Encode path to the locale
  333. encoding, decode the result from the locale encoding. */
  334. wchar_t*
  335. _Py_wrealpath(const wchar_t *path,
  336. wchar_t *resolved_path, size_t resolved_path_size)
  337. {
  338. char *cpath;
  339. char cresolved_path[PATH_MAX];
  340. wchar_t *wresolved_path;
  341. char *res;
  342. size_t r;
  343. cpath = _Py_wchar2char(path, NULL);
  344. if (cpath == NULL) {
  345. errno = EINVAL;
  346. return NULL;
  347. }
  348. res = realpath(cpath, cresolved_path);
  349. PyMem_Free(cpath);
  350. if (res == NULL)
  351. return NULL;
  352. wresolved_path = _Py_char2wchar(cresolved_path, &r);
  353. if (wresolved_path == NULL) {
  354. errno = EINVAL;
  355. return NULL;
  356. }
  357. if (resolved_path_size <= r) {
  358. PyMem_Free(wresolved_path);
  359. errno = EINVAL;
  360. return NULL;
  361. }
  362. wcsncpy(resolved_path, wresolved_path, resolved_path_size);
  363. PyMem_Free(wresolved_path);
  364. return resolved_path;
  365. }
  366. #endif
  367. /* Get the current directory. size is the buffer size in wide characters
  368. including the null character. Decode the path from the locale encoding. */
  369. wchar_t*
  370. _Py_wgetcwd(wchar_t *buf, size_t size)
  371. {
  372. #ifdef MS_WINDOWS
  373. return _wgetcwd(buf, size);
  374. #else
  375. char fname[PATH_MAX];
  376. wchar_t *wname;
  377. size_t len;
  378. if (getcwd(fname, PATH_MAX) == NULL)
  379. return NULL;
  380. wname = _Py_char2wchar(fname, &len);
  381. if (wname == NULL)
  382. return NULL;
  383. if (size <= len) {
  384. PyMem_Free(wname);
  385. return NULL;
  386. }
  387. wcsncpy(buf, wname, size);
  388. PyMem_Free(wname);
  389. return buf;
  390. #endif
  391. }
  392. #endif