You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

172 lines
5.2 KiB

  1. /* bytes to hex implementation */
  2. #include "Python.h"
  3. #include "pystrhex.h"
  4. static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen,
  5. const PyObject* sep, int bytes_per_sep_group,
  6. const int return_bytes)
  7. {
  8. assert(arglen >= 0);
  9. Py_UCS1 sep_char = 0;
  10. if (sep) {
  11. Py_ssize_t seplen = PyObject_Length((PyObject*)sep);
  12. if (seplen < 0) {
  13. return NULL;
  14. }
  15. if (seplen != 1) {
  16. PyErr_SetString(PyExc_ValueError, "sep must be length 1.");
  17. return NULL;
  18. }
  19. if (PyUnicode_Check(sep)) {
  20. if (PyUnicode_READY(sep))
  21. return NULL;
  22. if (PyUnicode_KIND(sep) != PyUnicode_1BYTE_KIND) {
  23. PyErr_SetString(PyExc_ValueError, "sep must be ASCII.");
  24. return NULL;
  25. }
  26. sep_char = PyUnicode_READ_CHAR(sep, 0);
  27. }
  28. else if (PyBytes_Check(sep)) {
  29. sep_char = PyBytes_AS_STRING(sep)[0];
  30. }
  31. else {
  32. PyErr_SetString(PyExc_TypeError, "sep must be str or bytes.");
  33. return NULL;
  34. }
  35. if (sep_char > 127 && !return_bytes) {
  36. PyErr_SetString(PyExc_ValueError, "sep must be ASCII.");
  37. return NULL;
  38. }
  39. }
  40. else {
  41. bytes_per_sep_group = 0;
  42. }
  43. unsigned int abs_bytes_per_sep = abs(bytes_per_sep_group);
  44. Py_ssize_t resultlen = 0;
  45. if (bytes_per_sep_group && arglen > 0) {
  46. /* How many sep characters we'll be inserting. */
  47. resultlen = (arglen - 1) / abs_bytes_per_sep;
  48. }
  49. /* Bounds checking for our Py_ssize_t indices. */
  50. if (arglen >= PY_SSIZE_T_MAX / 2 - resultlen) {
  51. return PyErr_NoMemory();
  52. }
  53. resultlen += arglen * 2;
  54. if ((size_t)abs_bytes_per_sep >= (size_t)arglen) {
  55. bytes_per_sep_group = 0;
  56. abs_bytes_per_sep = 0;
  57. }
  58. PyObject *retval;
  59. Py_UCS1 *retbuf;
  60. if (return_bytes) {
  61. /* If _PyBytes_FromSize() were public we could avoid malloc+copy. */
  62. retval = PyBytes_FromStringAndSize(NULL, resultlen);
  63. if (!retval) {
  64. return NULL;
  65. }
  66. retbuf = (Py_UCS1 *)PyBytes_AS_STRING(retval);
  67. }
  68. else {
  69. retval = PyUnicode_New(resultlen, 127);
  70. if (!retval) {
  71. return NULL;
  72. }
  73. retbuf = PyUnicode_1BYTE_DATA(retval);
  74. }
  75. /* Hexlify */
  76. Py_ssize_t i, j;
  77. unsigned char c;
  78. if (bytes_per_sep_group == 0) {
  79. for (i = j = 0; i < arglen; ++i) {
  80. assert((j + 1) < resultlen);
  81. c = argbuf[i];
  82. retbuf[j++] = Py_hexdigits[c >> 4];
  83. retbuf[j++] = Py_hexdigits[c & 0x0f];
  84. }
  85. assert(j == resultlen);
  86. }
  87. else {
  88. /* The number of complete chunk+sep periods */
  89. Py_ssize_t chunks = (arglen - 1) / abs_bytes_per_sep;
  90. Py_ssize_t chunk;
  91. unsigned int k;
  92. if (bytes_per_sep_group < 0) {
  93. i = j = 0;
  94. for (chunk = 0; chunk < chunks; chunk++) {
  95. for (k = 0; k < abs_bytes_per_sep; k++) {
  96. c = argbuf[i++];
  97. retbuf[j++] = Py_hexdigits[c >> 4];
  98. retbuf[j++] = Py_hexdigits[c & 0x0f];
  99. }
  100. retbuf[j++] = sep_char;
  101. }
  102. while (i < arglen) {
  103. c = argbuf[i++];
  104. retbuf[j++] = Py_hexdigits[c >> 4];
  105. retbuf[j++] = Py_hexdigits[c & 0x0f];
  106. }
  107. assert(j == resultlen);
  108. }
  109. else {
  110. i = arglen - 1;
  111. j = resultlen - 1;
  112. for (chunk = 0; chunk < chunks; chunk++) {
  113. for (k = 0; k < abs_bytes_per_sep; k++) {
  114. c = argbuf[i--];
  115. retbuf[j--] = Py_hexdigits[c & 0x0f];
  116. retbuf[j--] = Py_hexdigits[c >> 4];
  117. }
  118. retbuf[j--] = sep_char;
  119. }
  120. while (i >= 0) {
  121. c = argbuf[i--];
  122. retbuf[j--] = Py_hexdigits[c & 0x0f];
  123. retbuf[j--] = Py_hexdigits[c >> 4];
  124. }
  125. assert(j == -1);
  126. }
  127. }
  128. #ifdef Py_DEBUG
  129. if (!return_bytes) {
  130. assert(_PyUnicode_CheckConsistency(retval, 1));
  131. }
  132. #endif
  133. return retval;
  134. }
  135. PyObject * _Py_strhex(const char* argbuf, const Py_ssize_t arglen)
  136. {
  137. return _Py_strhex_impl(argbuf, arglen, NULL, 0, 0);
  138. }
  139. /* Same as above but returns a bytes() instead of str() to avoid the
  140. * need to decode the str() when bytes are needed. */
  141. PyObject * _Py_strhex_bytes(const char* argbuf, const Py_ssize_t arglen)
  142. {
  143. return _Py_strhex_impl(argbuf, arglen, NULL, 0, 1);
  144. }
  145. /* These variants include support for a separator between every N bytes: */
  146. PyObject * _Py_strhex_with_sep(const char* argbuf, const Py_ssize_t arglen, const PyObject* sep, const int bytes_per_group)
  147. {
  148. return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 0);
  149. }
  150. /* Same as above but returns a bytes() instead of str() to avoid the
  151. * need to decode the str() when bytes are needed. */
  152. PyObject * _Py_strhex_bytes_with_sep(const char* argbuf, const Py_ssize_t arglen, const PyObject* sep, const int bytes_per_group)
  153. {
  154. return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 1);
  155. }