You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1625 lines
45 KiB

25 years ago
24 years ago
27 years ago
27 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
27 years ago
23 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
27 years ago
25 years ago
27 years ago
27 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
27 years ago
25 years ago
25 years ago
25 years ago
25 years ago
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 4 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2003 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.0 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_0.txt. |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Authors: Stig Sther Bakken <ssb@php.net> |
  16. | Thies C. Arntzen <thies@thieso.net> |
  17. | Sterling Hughes <sterling@php.net> |
  18. +----------------------------------------------------------------------+
  19. */
  20. /* $Id$ */
  21. #define IS_EXT_MODULE
  22. #ifdef HAVE_CONFIG_H
  23. #include "config.h"
  24. #endif
  25. #include "php.h"
  26. #define PHP_XML_INTERNAL
  27. #include "zend_variables.h"
  28. #include "ext/standard/php_string.h"
  29. #include "ext/standard/info.h"
  30. #if HAVE_XML
  31. #include "php_xml.h"
  32. # include "ext/standard/head.h"
  33. #ifdef LIBXML_EXPAT_COMPAT
  34. #include "ext/libxml/php_libxml.h"
  35. #endif
  36. /* Short-term TODO list:
  37. * - Implement XML_ExternalEntityParserCreate()
  38. * - XML_SetCommentHandler
  39. * - XML_SetCdataSectionHandler
  40. * - XML_SetParamEntityParsing
  41. */
  42. /* Long-term TODO list:
  43. * - Fix the expat library so you can install your own memory manager
  44. * functions
  45. */
  46. /* Known bugs:
  47. * - Weird things happen with <![CDATA[]]> sections.
  48. */
  49. #ifdef ZTS
  50. int xml_globals_id;
  51. #else
  52. PHP_XML_API php_xml_globals xml_globals;
  53. #endif
  54. /* {{{ dynamically loadable module stuff */
  55. #ifdef COMPILE_DL_XML
  56. ZEND_GET_MODULE(xml)
  57. # ifdef PHP_WIN32
  58. # include "zend_arg_defs.c"
  59. # endif
  60. #endif /* COMPILE_DL_XML */
  61. /* }}} */
  62. /* {{{ function prototypes */
  63. PHP_MINIT_FUNCTION(xml);
  64. PHP_RINIT_FUNCTION(xml);
  65. PHP_MSHUTDOWN_FUNCTION(xml);
  66. PHP_RSHUTDOWN_FUNCTION(xml);
  67. PHP_MINFO_FUNCTION(xml);
  68. static void xml_parser_dtor(zend_rsrc_list_entry *rsrc TSRMLS_DC);
  69. static void xml_set_handler(zval **, zval **);
  70. inline static unsigned short xml_encode_iso_8859_1(unsigned char);
  71. inline static char xml_decode_iso_8859_1(unsigned short);
  72. inline static unsigned short xml_encode_us_ascii(unsigned char);
  73. inline static char xml_decode_us_ascii(unsigned short);
  74. static XML_Char *xml_utf8_encode(const char *, int, int *, const XML_Char *);
  75. static zval *xml_call_handler(xml_parser *, zval *, zend_function *, int, zval **);
  76. static zval *_xml_xmlchar_zval(const XML_Char *, int, const XML_Char *);
  77. static int _xml_xmlcharlen(const XML_Char *);
  78. static void _xml_add_to_info(xml_parser *parser,char *name);
  79. inline static char *_xml_decode_tag(xml_parser *parser, const char *tag);
  80. void _xml_startElementHandler(void *, const XML_Char *, const XML_Char **);
  81. void _xml_endElementHandler(void *, const XML_Char *);
  82. void _xml_characterDataHandler(void *, const XML_Char *, int);
  83. void _xml_processingInstructionHandler(void *, const XML_Char *, const XML_Char *);
  84. void _xml_defaultHandler(void *, const XML_Char *, int);
  85. void _xml_unparsedEntityDeclHandler(void *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
  86. void _xml_notationDeclHandler(void *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
  87. int _xml_externalEntityRefHandler(XML_Parser, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
  88. void _xml_startNamespaceDeclHandler(void *, const XML_Char *, const XML_Char *);
  89. void _xml_endNamespaceDeclHandler(void *, const XML_Char *);
  90. /* }}} */
  91. /* {{{ extension definition structures */
  92. static
  93. ZEND_BEGIN_ARG_INFO(third_and_fourth_args_force_ref, 0)
  94. ZEND_ARG_PASS_INFO(0)
  95. ZEND_ARG_PASS_INFO(0)
  96. ZEND_ARG_PASS_INFO(1)
  97. ZEND_ARG_PASS_INFO(1)
  98. ZEND_END_ARG_INFO();
  99. function_entry xml_functions[] = {
  100. PHP_FE(xml_parser_create, NULL)
  101. PHP_FE(xml_parser_create_ns, NULL)
  102. PHP_FE(xml_set_object, second_arg_force_ref)
  103. PHP_FE(xml_set_element_handler, NULL)
  104. PHP_FE(xml_set_character_data_handler, NULL)
  105. PHP_FE(xml_set_processing_instruction_handler, NULL)
  106. PHP_FE(xml_set_default_handler, NULL)
  107. PHP_FE(xml_set_unparsed_entity_decl_handler, NULL)
  108. PHP_FE(xml_set_notation_decl_handler, NULL)
  109. PHP_FE(xml_set_external_entity_ref_handler, NULL)
  110. PHP_FE(xml_set_start_namespace_decl_handler, NULL)
  111. PHP_FE(xml_set_end_namespace_decl_handler, NULL)
  112. PHP_FE(xml_parse, NULL)
  113. PHP_FE(xml_parse_into_struct, third_and_fourth_args_force_ref)
  114. PHP_FE(xml_get_error_code, NULL)
  115. PHP_FE(xml_error_string, NULL)
  116. PHP_FE(xml_get_current_line_number, NULL)
  117. PHP_FE(xml_get_current_column_number, NULL)
  118. PHP_FE(xml_get_current_byte_index, NULL)
  119. PHP_FE(xml_parser_free, NULL)
  120. PHP_FE(xml_parser_set_option, NULL)
  121. PHP_FE(xml_parser_get_option, NULL)
  122. PHP_FE(utf8_encode, NULL)
  123. PHP_FE(utf8_decode, NULL)
  124. {NULL, NULL, NULL}
  125. };
  126. zend_module_entry xml_module_entry = {
  127. STANDARD_MODULE_HEADER,
  128. "xml", /* extension name */
  129. xml_functions, /* extension function list */
  130. PHP_MINIT(xml), /* extension-wide startup function */
  131. PHP_MSHUTDOWN(xml), /* extension-wide shutdown function */
  132. PHP_RINIT(xml), /* per-request startup function */
  133. PHP_RSHUTDOWN(xml), /* per-request shutdown function */
  134. PHP_MINFO(xml), /* information function */
  135. NO_VERSION_YET,
  136. STANDARD_MODULE_PROPERTIES
  137. };
  138. /* All the encoding functions are set to NULL right now, since all
  139. * the encoding is currently done internally by expat/xmltok.
  140. */
  141. xml_encoding xml_encodings[] = {
  142. { "ISO-8859-1", xml_decode_iso_8859_1, xml_encode_iso_8859_1 },
  143. { "US-ASCII", xml_decode_us_ascii, xml_encode_us_ascii },
  144. { "UTF-8", NULL, NULL },
  145. { NULL, NULL, NULL }
  146. };
  147. static XML_Memory_Handling_Suite php_xml_mem_hdlrs;
  148. /* True globals, no need for thread safety */
  149. static int le_xml_parser;
  150. /* }}} */
  151. /* {{{ startup, shutdown and info functions */
  152. #ifdef ZTS
  153. static void php_xml_init_globals(php_xml_globals *xml_globals_p TSRMLS_DC)
  154. {
  155. XML(default_encoding) = "ISO-8859-1";
  156. }
  157. #endif
  158. static void *php_xml_malloc_wrapper(size_t sz)
  159. {
  160. return emalloc(sz);
  161. }
  162. static void *php_xml_realloc_wrapper(void *ptr, size_t sz)
  163. {
  164. return erealloc(ptr, sz);
  165. }
  166. static void php_xml_free_wrapper(void *ptr)
  167. {
  168. efree(ptr);
  169. }
  170. PHP_MINIT_FUNCTION(xml)
  171. {
  172. le_xml_parser = zend_register_list_destructors_ex(xml_parser_dtor, NULL, "xml", module_number);
  173. #ifdef ZTS
  174. ts_allocate_id(&xml_globals_id, sizeof(php_xml_globals), (ts_allocate_ctor) php_xml_init_globals, NULL);
  175. #else
  176. XML(default_encoding) = "ISO-8859-1";
  177. #endif
  178. REGISTER_LONG_CONSTANT("XML_ERROR_NONE", XML_ERROR_NONE, CONST_CS|CONST_PERSISTENT);
  179. REGISTER_LONG_CONSTANT("XML_ERROR_NO_MEMORY", XML_ERROR_NO_MEMORY, CONST_CS|CONST_PERSISTENT);
  180. REGISTER_LONG_CONSTANT("XML_ERROR_SYNTAX", XML_ERROR_SYNTAX, CONST_CS|CONST_PERSISTENT);
  181. REGISTER_LONG_CONSTANT("XML_ERROR_NO_ELEMENTS", XML_ERROR_NO_ELEMENTS, CONST_CS|CONST_PERSISTENT);
  182. REGISTER_LONG_CONSTANT("XML_ERROR_INVALID_TOKEN", XML_ERROR_INVALID_TOKEN, CONST_CS|CONST_PERSISTENT);
  183. REGISTER_LONG_CONSTANT("XML_ERROR_UNCLOSED_TOKEN", XML_ERROR_UNCLOSED_TOKEN, CONST_CS|CONST_PERSISTENT);
  184. REGISTER_LONG_CONSTANT("XML_ERROR_PARTIAL_CHAR", XML_ERROR_PARTIAL_CHAR, CONST_CS|CONST_PERSISTENT);
  185. REGISTER_LONG_CONSTANT("XML_ERROR_TAG_MISMATCH", XML_ERROR_TAG_MISMATCH, CONST_CS|CONST_PERSISTENT);
  186. REGISTER_LONG_CONSTANT("XML_ERROR_DUPLICATE_ATTRIBUTE", XML_ERROR_DUPLICATE_ATTRIBUTE, CONST_CS|CONST_PERSISTENT);
  187. REGISTER_LONG_CONSTANT("XML_ERROR_JUNK_AFTER_DOC_ELEMENT", XML_ERROR_JUNK_AFTER_DOC_ELEMENT, CONST_CS|CONST_PERSISTENT);
  188. REGISTER_LONG_CONSTANT("XML_ERROR_PARAM_ENTITY_REF", XML_ERROR_PARAM_ENTITY_REF, CONST_CS|CONST_PERSISTENT);
  189. REGISTER_LONG_CONSTANT("XML_ERROR_UNDEFINED_ENTITY", XML_ERROR_UNDEFINED_ENTITY, CONST_CS|CONST_PERSISTENT);
  190. REGISTER_LONG_CONSTANT("XML_ERROR_RECURSIVE_ENTITY_REF", XML_ERROR_RECURSIVE_ENTITY_REF, CONST_CS|CONST_PERSISTENT);
  191. REGISTER_LONG_CONSTANT("XML_ERROR_ASYNC_ENTITY", XML_ERROR_ASYNC_ENTITY, CONST_CS|CONST_PERSISTENT);
  192. REGISTER_LONG_CONSTANT("XML_ERROR_BAD_CHAR_REF", XML_ERROR_BAD_CHAR_REF, CONST_CS|CONST_PERSISTENT);
  193. REGISTER_LONG_CONSTANT("XML_ERROR_BINARY_ENTITY_REF", XML_ERROR_BINARY_ENTITY_REF, CONST_CS|CONST_PERSISTENT);
  194. REGISTER_LONG_CONSTANT("XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF", XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF, CONST_CS|CONST_PERSISTENT);
  195. REGISTER_LONG_CONSTANT("XML_ERROR_MISPLACED_XML_PI", XML_ERROR_MISPLACED_XML_PI, CONST_CS|CONST_PERSISTENT);
  196. REGISTER_LONG_CONSTANT("XML_ERROR_UNKNOWN_ENCODING", XML_ERROR_UNKNOWN_ENCODING, CONST_CS|CONST_PERSISTENT);
  197. REGISTER_LONG_CONSTANT("XML_ERROR_INCORRECT_ENCODING", XML_ERROR_INCORRECT_ENCODING, CONST_CS|CONST_PERSISTENT);
  198. REGISTER_LONG_CONSTANT("XML_ERROR_UNCLOSED_CDATA_SECTION", XML_ERROR_UNCLOSED_CDATA_SECTION, CONST_CS|CONST_PERSISTENT);
  199. REGISTER_LONG_CONSTANT("XML_ERROR_EXTERNAL_ENTITY_HANDLING", XML_ERROR_EXTERNAL_ENTITY_HANDLING, CONST_CS|CONST_PERSISTENT);
  200. REGISTER_LONG_CONSTANT("XML_OPTION_CASE_FOLDING", PHP_XML_OPTION_CASE_FOLDING, CONST_CS|CONST_PERSISTENT);
  201. REGISTER_LONG_CONSTANT("XML_OPTION_TARGET_ENCODING", PHP_XML_OPTION_TARGET_ENCODING, CONST_CS|CONST_PERSISTENT);
  202. REGISTER_LONG_CONSTANT("XML_OPTION_SKIP_TAGSTART", PHP_XML_OPTION_SKIP_TAGSTART, CONST_CS|CONST_PERSISTENT);
  203. REGISTER_LONG_CONSTANT("XML_OPTION_SKIP_WHITE", PHP_XML_OPTION_SKIP_WHITE, CONST_CS|CONST_PERSISTENT);
  204. /* this object should not be pre-initialised at compile time,
  205. as the order of members may vary */
  206. php_xml_mem_hdlrs.malloc_fcn = php_xml_malloc_wrapper;
  207. php_xml_mem_hdlrs.realloc_fcn = php_xml_realloc_wrapper;
  208. php_xml_mem_hdlrs.free_fcn = php_xml_free_wrapper;
  209. #ifdef LIBXML_EXPAT_COMPAT
  210. REGISTER_STRING_CONSTANT("XML_SAX_IMPL", "libxml", CONST_CS|CONST_PERSISTENT);
  211. #else
  212. REGISTER_STRING_CONSTANT("XML_SAX_IMPL", "expat", CONST_CS|CONST_PERSISTENT);
  213. #endif
  214. #ifdef LIBXML_EXPAT_COMPAT
  215. php_libxml_initialize();
  216. #endif
  217. return SUCCESS;
  218. }
  219. PHP_RINIT_FUNCTION(xml)
  220. {
  221. return SUCCESS;
  222. }
  223. PHP_MSHUTDOWN_FUNCTION(xml)
  224. {
  225. #ifdef LIBXML_EXPAT_COMPAT
  226. php_libxml_shutdown();
  227. #endif
  228. return SUCCESS;
  229. }
  230. PHP_RSHUTDOWN_FUNCTION(xml)
  231. {
  232. return SUCCESS;
  233. }
  234. PHP_MINFO_FUNCTION(xml)
  235. {
  236. php_info_print_table_start();
  237. php_info_print_table_row(2, "XML Support", "active");
  238. php_info_print_table_row(2, "XML Namespace Support", "active");
  239. #if defined(LIBXML_DOTTED_VERSION) && defined(LIBXML_EXPAT_COMPAT)
  240. php_info_print_table_row(2, "libxml2 Version", LIBXML_DOTTED_VERSION);
  241. #else
  242. php_info_print_table_row(2, "EXPAT Version", XML_ExpatVersion());
  243. #endif
  244. php_info_print_table_end();
  245. }
  246. /* }}} */
  247. /* {{{ extension-internal functions */
  248. static zval *_xml_resource_zval(long value)
  249. {
  250. zval *ret;
  251. TSRMLS_FETCH();
  252. MAKE_STD_ZVAL(ret);
  253. Z_TYPE_P(ret) = IS_RESOURCE;
  254. Z_LVAL_P(ret) = value;
  255. zend_list_addref(value);
  256. return ret;
  257. }
  258. static zval *_xml_string_zval(const char *str)
  259. {
  260. zval *ret;
  261. int len = strlen(str);
  262. MAKE_STD_ZVAL(ret);
  263. Z_TYPE_P(ret) = IS_STRING;
  264. Z_STRLEN_P(ret) = len;
  265. Z_STRVAL_P(ret) = estrndup(str, len);
  266. return ret;
  267. }
  268. static zval *_xml_xmlchar_zval(const XML_Char *s, int len, const XML_Char *encoding)
  269. {
  270. zval *ret;
  271. MAKE_STD_ZVAL(ret);
  272. if (s == NULL) {
  273. ZVAL_FALSE(ret);
  274. return ret;
  275. }
  276. if (len == 0) {
  277. len = _xml_xmlcharlen(s);
  278. }
  279. Z_TYPE_P(ret) = IS_STRING;
  280. Z_STRVAL_P(ret) = xml_utf8_decode(s, len, &Z_STRLEN_P(ret), encoding);
  281. return ret;
  282. }
  283. /* }}} */
  284. /* {{{ xml_parser_dtor() */
  285. static void xml_parser_dtor(zend_rsrc_list_entry *rsrc TSRMLS_DC)
  286. {
  287. xml_parser *parser = (xml_parser *)rsrc->ptr;
  288. if (parser->parser) {
  289. XML_ParserFree(parser->parser);
  290. }
  291. if (parser->ltags) {
  292. int inx;
  293. for (inx = 0; inx < parser->level; inx++)
  294. efree(parser->ltags[ inx ]);
  295. efree(parser->ltags);
  296. }
  297. if (parser->startElementHandler) {
  298. zval_ptr_dtor(&parser->startElementHandler);
  299. }
  300. if (parser->endElementHandler) {
  301. zval_ptr_dtor(&parser->endElementHandler);
  302. }
  303. if (parser->characterDataHandler) {
  304. zval_ptr_dtor(&parser->characterDataHandler);
  305. }
  306. if (parser->processingInstructionHandler) {
  307. zval_ptr_dtor(&parser->processingInstructionHandler);
  308. }
  309. if (parser->defaultHandler) {
  310. zval_ptr_dtor(&parser->defaultHandler);
  311. }
  312. if (parser->unparsedEntityDeclHandler) {
  313. zval_ptr_dtor(&parser->unparsedEntityDeclHandler);
  314. }
  315. if (parser->notationDeclHandler) {
  316. zval_ptr_dtor(&parser->notationDeclHandler);
  317. }
  318. if (parser->externalEntityRefHandler) {
  319. zval_ptr_dtor(&parser->externalEntityRefHandler);
  320. }
  321. if (parser->unknownEncodingHandler) {
  322. zval_ptr_dtor(&parser->unknownEncodingHandler);
  323. }
  324. if (parser->startNamespaceDeclHandler) {
  325. zval_ptr_dtor(&parser->startNamespaceDeclHandler);
  326. }
  327. if (parser->endNamespaceDeclHandler) {
  328. zval_ptr_dtor(&parser->endNamespaceDeclHandler);
  329. }
  330. if (parser->baseURI) {
  331. efree(parser->baseURI);
  332. }
  333. efree(parser);
  334. }
  335. /* }}} */
  336. /* {{{ xml_set_handler() */
  337. static void xml_set_handler(zval **handler, zval **data)
  338. {
  339. /* If we have already a handler, release it */
  340. if (*handler) {
  341. zval_ptr_dtor(handler);
  342. }
  343. /* IS_ARRAY might indicate that we're using array($obj, 'method') syntax */
  344. if (Z_TYPE_PP(data) != IS_ARRAY) {
  345. convert_to_string_ex(data);
  346. }
  347. zval_add_ref(data);
  348. *handler = *data;
  349. }
  350. /* }}} */
  351. /* {{{ xml_call_handler() */
  352. static zval *xml_call_handler(xml_parser *parser, zval *handler, zend_function *function_ptr, int argc, zval **argv)
  353. {
  354. TSRMLS_FETCH();
  355. if (parser && handler) {
  356. zval ***args;
  357. zval *retval;
  358. int i;
  359. int result;
  360. zend_fcall_info fci;
  361. args = safe_emalloc(sizeof(zval **), argc, 0);
  362. for (i = 0; i < argc; i++) {
  363. args[i] = &argv[i];
  364. }
  365. fci.size = sizeof(fci);
  366. fci.function_table = EG(function_table);
  367. fci.function_name = handler;
  368. fci.symbol_table = NULL;
  369. fci.object_pp = &parser->object;
  370. fci.retval_ptr_ptr = &retval;
  371. fci.param_count = argc;
  372. fci.params = args;
  373. fci.no_separation = 0;
  374. /*fci.function_handler_cache = &function_ptr;*/
  375. result = zend_call_function(&fci, NULL TSRMLS_CC);
  376. if (result == FAILURE) {
  377. zval **method;
  378. zval **obj;
  379. if (Z_TYPE_P(handler) == IS_STRING) {
  380. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call handler %s()", Z_STRVAL_P(handler));
  381. } else if (zend_hash_index_find(Z_ARRVAL_P(handler), 0, (void **) &obj) == SUCCESS &&
  382. zend_hash_index_find(Z_ARRVAL_P(handler), 1, (void **) &method) == SUCCESS &&
  383. Z_TYPE_PP(obj) == IS_OBJECT &&
  384. Z_TYPE_PP(method) == IS_STRING) {
  385. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call handler %s::%s()", Z_OBJCE_PP(obj)->name, Z_STRVAL_PP(method));
  386. } else
  387. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call handler");
  388. }
  389. for (i = 0; i < argc; i++) {
  390. zval_ptr_dtor(args[i]);
  391. }
  392. efree(args);
  393. if (result == FAILURE) {
  394. return NULL;
  395. } else {
  396. return retval;
  397. }
  398. }
  399. return NULL;
  400. }
  401. /* }}} */
  402. /* {{{ xml_encode_iso_8859_1() */
  403. inline static unsigned short xml_encode_iso_8859_1(unsigned char c)
  404. {
  405. return (unsigned short)c;
  406. }
  407. /* }}} */
  408. /* {{{ xml_decode_iso_8859_1() */
  409. inline static char xml_decode_iso_8859_1(unsigned short c)
  410. {
  411. return (char)(c > 0xff ? '?' : c);
  412. }
  413. /* }}} */
  414. /* {{{ xml_encode_us_ascii() */
  415. inline static unsigned short xml_encode_us_ascii(unsigned char c)
  416. {
  417. return (unsigned short)c;
  418. }
  419. /* }}} */
  420. /* {{{ xml_decode_us_ascii() */
  421. inline static char xml_decode_us_ascii(unsigned short c)
  422. {
  423. return (char)(c > 0x7f ? '?' : c);
  424. }
  425. /* }}} */
  426. /* {{{ xml_get_encoding() */
  427. static xml_encoding *xml_get_encoding(const XML_Char *name)
  428. {
  429. xml_encoding *enc = &xml_encodings[0];
  430. while (enc && enc->name) {
  431. if (strcasecmp(name, enc->name) == 0) {
  432. return enc;
  433. }
  434. enc++;
  435. }
  436. return NULL;
  437. }
  438. /* }}} */
  439. /* {{{ xml_utf8_encode */
  440. static XML_Char *xml_utf8_encode(const char *s, int len, int *newlen, const XML_Char *encoding)
  441. {
  442. int pos = len;
  443. char *newbuf;
  444. unsigned short c;
  445. unsigned short (*encoder)(unsigned char) = NULL;
  446. xml_encoding *enc = xml_get_encoding(encoding);
  447. *newlen = 0;
  448. if (enc) {
  449. encoder = enc->encoding_function;
  450. } else {
  451. /* If the target encoding was unknown, fail */
  452. return NULL;
  453. }
  454. if (encoder == NULL) {
  455. /* If no encoder function was specified, return the data as-is.
  456. */
  457. newbuf = emalloc(len + 1);
  458. memcpy(newbuf, s, len);
  459. *newlen = len;
  460. newbuf[*newlen] = '\0';
  461. return newbuf;
  462. }
  463. /* This is the theoretical max (will never get beyond len * 2 as long
  464. * as we are converting from single-byte characters, though) */
  465. newbuf = safe_emalloc(len, 4, 1);
  466. while (pos > 0) {
  467. c = encoder ? encoder((unsigned char)(*s)) : (unsigned short)(*s);
  468. if (c < 0x80) {
  469. newbuf[(*newlen)++] = (char) c;
  470. } else if (c < 0x800) {
  471. newbuf[(*newlen)++] = (0xc0 | (c >> 6));
  472. newbuf[(*newlen)++] = (0x80 | (c & 0x3f));
  473. } else if (c < 0x10000) {
  474. newbuf[(*newlen)++] = (0xe0 | (c >> 12));
  475. newbuf[(*newlen)++] = (0xc0 | ((c >> 6) & 0x3f));
  476. newbuf[(*newlen)++] = (0x80 | (c & 0x3f));
  477. } else if (c < 0x200000) {
  478. newbuf[(*newlen)++] = (0xf0 | (c >> 18));
  479. newbuf[(*newlen)++] = (0xe0 | ((c >> 12) & 0x3f));
  480. newbuf[(*newlen)++] = (0xc0 | ((c >> 6) & 0x3f));
  481. newbuf[(*newlen)++] = (0x80 | (c & 0x3f));
  482. }
  483. pos--;
  484. s++;
  485. }
  486. newbuf[*newlen] = 0;
  487. newbuf = erealloc(newbuf, (*newlen)+1);
  488. return newbuf;
  489. }
  490. /* }}} */
  491. /* {{{ xml_utf8_decode */
  492. PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_Char *encoding)
  493. {
  494. int pos = len;
  495. char *newbuf = emalloc(len + 1);
  496. unsigned short c;
  497. char (*decoder)(unsigned short) = NULL;
  498. xml_encoding *enc = xml_get_encoding(encoding);
  499. *newlen = 0;
  500. if (enc) {
  501. decoder = enc->decoding_function;
  502. }
  503. if (decoder == NULL) {
  504. /* If the target encoding was unknown, or no decoder function
  505. * was specified, return the UTF-8-encoded data as-is.
  506. */
  507. memcpy(newbuf, s, len);
  508. *newlen = len;
  509. newbuf[*newlen] = '\0';
  510. return newbuf;
  511. }
  512. while (pos > 0) {
  513. c = (unsigned char)(*s);
  514. if (c >= 0xf0) { /* four bytes encoded, 21 bits */
  515. c = ((s[0]&7)<<18) | ((s[1]&63)<<12) | ((s[2]&63)<<6) | (s[3]&63);
  516. s += 4;
  517. pos -= 4;
  518. } else if (c >= 0xe0) { /* three bytes encoded, 16 bits */
  519. c = ((s[0]&63)<<12) | ((s[1]&63)<<6) | (s[2]&63);
  520. s += 3;
  521. pos -= 3;
  522. } else if (c >= 0xc0) { /* two bytes encoded, 11 bits */
  523. c = ((s[0]&63)<<6) | (s[1]&63);
  524. s += 2;
  525. pos -= 2;
  526. } else {
  527. s++;
  528. pos--;
  529. }
  530. newbuf[*newlen] = decoder ? decoder(c) : c;
  531. ++*newlen;
  532. }
  533. if (*newlen < len) {
  534. newbuf = erealloc(newbuf, *newlen + 1);
  535. }
  536. newbuf[*newlen] = '\0';
  537. return newbuf;
  538. }
  539. /* }}} */
  540. /* {{{ _xml_xmlcharlen() */
  541. static int _xml_xmlcharlen(const XML_Char *s)
  542. {
  543. int len = 0;
  544. while (*s) {
  545. len++;
  546. s++;
  547. }
  548. return len;
  549. }
  550. /* }}} */
  551. /* {{{ _xml_zval_strdup() */
  552. PHPAPI char *_xml_zval_strdup(zval *val)
  553. {
  554. if (Z_TYPE_P(val) == IS_STRING) {
  555. char *buf = emalloc(Z_STRLEN_P(val) + 1);
  556. memcpy(buf, Z_STRVAL_P(val), Z_STRLEN_P(val));
  557. buf[Z_STRLEN_P(val)] = '\0';
  558. return buf;
  559. }
  560. return NULL;
  561. }
  562. /* }}} */
  563. /* {{{ _xml_add_to_info */
  564. static void _xml_add_to_info(xml_parser *parser,char *name)
  565. {
  566. zval **element, *values;
  567. if (! parser->info) {
  568. return;
  569. }
  570. if (zend_hash_find(Z_ARRVAL_P(parser->info),name,strlen(name) + 1,(void **) &element) == FAILURE) {
  571. MAKE_STD_ZVAL(values);
  572. array_init(values);
  573. zend_hash_update(Z_ARRVAL_P(parser->info), name, strlen(name)+1, (void *) &values, sizeof(zval*), (void **) &element);
  574. }
  575. add_next_index_long(*element,parser->curtag);
  576. parser->curtag++;
  577. }
  578. /* }}} */
  579. /* {{{ _xml_decode_tag() */
  580. static char *_xml_decode_tag(xml_parser *parser, const char *tag)
  581. {
  582. char *newstr;
  583. int out_len;
  584. newstr = xml_utf8_decode(tag, strlen(tag), &out_len, parser->target_encoding);
  585. if (parser->case_folding) {
  586. php_strtoupper(newstr, out_len);
  587. }
  588. return newstr;
  589. }
  590. /* }}} */
  591. /* {{{ _xml_startElementHandler() */
  592. void _xml_startElementHandler(void *userData, const XML_Char *name, const XML_Char **attributes)
  593. {
  594. xml_parser *parser = (xml_parser *)userData;
  595. const char **attrs = (const char **) attributes;
  596. char *tag_name;
  597. char *att, *val;
  598. int val_len;
  599. zval *retval, *args[3];
  600. if (parser) {
  601. parser->level++;
  602. tag_name = _xml_decode_tag(parser, name);
  603. if (parser->startElementHandler) {
  604. args[0] = _xml_resource_zval(parser->index);
  605. args[1] = _xml_string_zval(tag_name);
  606. MAKE_STD_ZVAL(args[2]);
  607. array_init(args[2]);
  608. while (attributes && *attributes) {
  609. att = _xml_decode_tag(parser, attributes[0]);
  610. val = xml_utf8_decode(attributes[1], strlen(attributes[1]), &val_len, parser->target_encoding);
  611. add_assoc_stringl(args[2], att, val, val_len, 0);
  612. attributes += 2;
  613. efree(att);
  614. }
  615. if ((retval = xml_call_handler(parser, parser->startElementHandler, parser->startElementPtr, 3, args))) {
  616. zval_ptr_dtor(&retval);
  617. }
  618. }
  619. if (parser->data) {
  620. zval *tag, *atr;
  621. int atcnt = 0;
  622. MAKE_STD_ZVAL(tag);
  623. MAKE_STD_ZVAL(atr);
  624. array_init(tag);
  625. array_init(atr);
  626. _xml_add_to_info(parser,((char *) tag_name) + parser->toffset);
  627. add_assoc_string(tag,"tag",((char *) tag_name) + parser->toffset,1); /* cast to avoid gcc-warning */
  628. add_assoc_string(tag,"type","open",1);
  629. add_assoc_long(tag,"level",parser->level);
  630. parser->ltags[parser->level-1] = estrdup(tag_name);
  631. parser->lastwasopen = 1;
  632. attributes = (const XML_Char **) attrs;
  633. while (attributes && *attributes) {
  634. att = _xml_decode_tag(parser, attributes[0]);
  635. val = xml_utf8_decode(attributes[1], strlen(attributes[1]), &val_len, parser->target_encoding);
  636. add_assoc_stringl(atr,att,val,val_len,0);
  637. atcnt++;
  638. attributes += 2;
  639. efree(att);
  640. }
  641. if (atcnt) {
  642. zend_hash_add(Z_ARRVAL_P(tag),"attributes",sizeof("attributes"),&atr,sizeof(zval*),NULL);
  643. } else {
  644. zval_ptr_dtor(&atr);
  645. }
  646. zend_hash_next_index_insert(Z_ARRVAL_P(parser->data),&tag,sizeof(zval*),(void *) &parser->ctag);
  647. }
  648. efree(tag_name);
  649. }
  650. }
  651. /* }}} */
  652. /* {{{ _xml_endElementHandler() */
  653. void _xml_endElementHandler(void *userData, const XML_Char *name)
  654. {
  655. xml_parser *parser = (xml_parser *)userData;
  656. char *tag_name;
  657. if (parser) {
  658. zval *retval, *args[2];
  659. tag_name = _xml_decode_tag(parser, name);
  660. if (parser->endElementHandler) {
  661. args[0] = _xml_resource_zval(parser->index);
  662. args[1] = _xml_string_zval(tag_name);
  663. if ((retval = xml_call_handler(parser, parser->endElementHandler, parser->endElementPtr, 2, args))) {
  664. zval_ptr_dtor(&retval);
  665. }
  666. }
  667. if (parser->data) {
  668. zval *tag;
  669. if (parser->lastwasopen) {
  670. add_assoc_string(*(parser->ctag),"type","complete",1);
  671. } else {
  672. MAKE_STD_ZVAL(tag);
  673. array_init(tag);
  674. _xml_add_to_info(parser,((char *) tag_name) + parser->toffset);
  675. add_assoc_string(tag,"tag",((char *) tag_name) + parser->toffset,1); /* cast to avoid gcc-warning */
  676. add_assoc_string(tag,"type","close",1);
  677. add_assoc_long(tag,"level",parser->level);
  678. zend_hash_next_index_insert(Z_ARRVAL_P(parser->data),&tag,sizeof(zval*),NULL);
  679. }
  680. parser->lastwasopen = 0;
  681. }
  682. efree(tag_name);
  683. if (parser->ltags) {
  684. efree(parser->ltags[parser->level-1]);
  685. }
  686. parser->level--;
  687. }
  688. }
  689. /* }}} */
  690. /* {{{ _xml_characterDataHandler() */
  691. void _xml_characterDataHandler(void *userData, const XML_Char *s, int len)
  692. {
  693. xml_parser *parser = (xml_parser *)userData;
  694. if (parser) {
  695. zval *retval, *args[2];
  696. if (parser->characterDataHandler) {
  697. args[0] = _xml_resource_zval(parser->index);
  698. args[1] = _xml_xmlchar_zval(s, len, parser->target_encoding);
  699. if ((retval = xml_call_handler(parser, parser->characterDataHandler, parser->characterDataPtr, 2, args))) {
  700. zval_ptr_dtor(&retval);
  701. }
  702. }
  703. if (parser->data) {
  704. int i;
  705. int doprint = 0;
  706. char *decoded_value;
  707. int decoded_len;
  708. decoded_value = xml_utf8_decode(s,len,&decoded_len,parser->target_encoding);
  709. for (i = 0; i < decoded_len; i++) {
  710. switch (decoded_value[i]) {
  711. case ' ':
  712. case '\t':
  713. case '\n':
  714. continue;
  715. default:
  716. doprint = 1;
  717. break;
  718. }
  719. if (doprint) {
  720. break;
  721. }
  722. }
  723. if (doprint || (! parser->skipwhite)) {
  724. if (parser->lastwasopen) {
  725. zval **myval;
  726. /* check if the current tag already has a value - if yes append to that! */
  727. if (zend_hash_find(Z_ARRVAL_PP(parser->ctag),"value",sizeof("value"),(void **) &myval) == SUCCESS) {
  728. int newlen = Z_STRLEN_PP(myval) + decoded_len;
  729. Z_STRVAL_PP(myval) = erealloc(Z_STRVAL_PP(myval),newlen+1);
  730. strcpy(Z_STRVAL_PP(myval) + Z_STRLEN_PP(myval),decoded_value);
  731. Z_STRLEN_PP(myval) += decoded_len;
  732. efree(decoded_value);
  733. } else {
  734. add_assoc_string(*(parser->ctag),"value",decoded_value,0);
  735. }
  736. } else {
  737. zval *tag;
  738. MAKE_STD_ZVAL(tag);
  739. array_init(tag);
  740. _xml_add_to_info(parser,parser->ltags[parser->level-1] + parser->toffset);
  741. add_assoc_string(tag,"tag",parser->ltags[parser->level-1] + parser->toffset,1);
  742. add_assoc_string(tag,"value",decoded_value,0);
  743. add_assoc_string(tag,"type","cdata",1);
  744. add_assoc_long(tag,"level",parser->level);
  745. zend_hash_next_index_insert(Z_ARRVAL_P(parser->data),&tag,sizeof(zval*),NULL);
  746. }
  747. } else {
  748. efree(decoded_value);
  749. }
  750. }
  751. }
  752. }
  753. /* }}} */
  754. /* {{{ _xml_processingInstructionHandler() */
  755. void _xml_processingInstructionHandler(void *userData, const XML_Char *target, const XML_Char *data)
  756. {
  757. xml_parser *parser = (xml_parser *)userData;
  758. if (parser && parser->processingInstructionHandler) {
  759. zval *retval, *args[3];
  760. args[0] = _xml_resource_zval(parser->index);
  761. args[1] = _xml_xmlchar_zval(target, 0, parser->target_encoding);
  762. args[2] = _xml_xmlchar_zval(data, 0, parser->target_encoding);
  763. if ((retval = xml_call_handler(parser, parser->processingInstructionHandler, parser->processingInstructionPtr, 3, args))) {
  764. zval_ptr_dtor(&retval);
  765. }
  766. }
  767. }
  768. /* }}} */
  769. /* {{{ _xml_defaultHandler() */
  770. void _xml_defaultHandler(void *userData, const XML_Char *s, int len)
  771. {
  772. xml_parser *parser = (xml_parser *)userData;
  773. if (parser && parser->defaultHandler) {
  774. zval *retval, *args[2];
  775. args[0] = _xml_resource_zval(parser->index);
  776. args[1] = _xml_xmlchar_zval(s, len, parser->target_encoding);
  777. if ((retval = xml_call_handler(parser, parser->defaultHandler, parser->defaultPtr, 2, args))) {
  778. zval_ptr_dtor(&retval);
  779. }
  780. }
  781. }
  782. /* }}} */
  783. /* {{{ _xml_unparsedEntityDeclHandler() */
  784. void _xml_unparsedEntityDeclHandler(void *userData,
  785. const XML_Char *entityName,
  786. const XML_Char *base,
  787. const XML_Char *systemId,
  788. const XML_Char *publicId,
  789. const XML_Char *notationName)
  790. {
  791. xml_parser *parser = (xml_parser *)userData;
  792. if (parser && parser->unparsedEntityDeclHandler) {
  793. zval *retval, *args[6];
  794. args[0] = _xml_resource_zval(parser->index);
  795. args[1] = _xml_xmlchar_zval(entityName, 0, parser->target_encoding);
  796. args[2] = _xml_xmlchar_zval(base, 0, parser->target_encoding);
  797. args[3] = _xml_xmlchar_zval(systemId, 0, parser->target_encoding);
  798. args[4] = _xml_xmlchar_zval(publicId, 0, parser->target_encoding);
  799. args[5] = _xml_xmlchar_zval(notationName, 0, parser->target_encoding);
  800. if ((retval = xml_call_handler(parser, parser->unparsedEntityDeclHandler, parser->unparsedEntityDeclPtr, 6, args))) {
  801. zval_ptr_dtor(&retval);
  802. }
  803. }
  804. }
  805. /* }}} */
  806. /* {{{ _xml_notationDeclHandler() */
  807. void _xml_notationDeclHandler(void *userData,
  808. const XML_Char *notationName,
  809. const XML_Char *base,
  810. const XML_Char *systemId,
  811. const XML_Char *publicId)
  812. {
  813. xml_parser *parser = (xml_parser *)userData;
  814. if (parser && parser->notationDeclHandler) {
  815. zval *retval, *args[5];
  816. args[0] = _xml_resource_zval(parser->index);
  817. args[1] = _xml_xmlchar_zval(notationName, 0, parser->target_encoding);
  818. args[2] = _xml_xmlchar_zval(base, 0, parser->target_encoding);
  819. args[3] = _xml_xmlchar_zval(systemId, 0, parser->target_encoding);
  820. args[4] = _xml_xmlchar_zval(publicId, 0, parser->target_encoding);
  821. if ((retval = xml_call_handler(parser, parser->notationDeclHandler, parser->notationDeclPtr, 5, args))) {
  822. zval_ptr_dtor(&retval);
  823. }
  824. }
  825. }
  826. /* }}} */
  827. /* {{{ _xml_externalEntityRefHandler() */
  828. int _xml_externalEntityRefHandler(XML_Parser parserPtr,
  829. const XML_Char *openEntityNames,
  830. const XML_Char *base,
  831. const XML_Char *systemId,
  832. const XML_Char *publicId)
  833. {
  834. xml_parser *parser = XML_GetUserData(parserPtr);
  835. int ret = 0; /* abort if no handler is set (should be configurable?) */
  836. if (parser && parser->externalEntityRefHandler) {
  837. zval *retval, *args[5];
  838. args[0] = _xml_resource_zval(parser->index);
  839. args[1] = _xml_xmlchar_zval(openEntityNames, 0, parser->target_encoding);
  840. args[2] = _xml_xmlchar_zval(base, 0, parser->target_encoding);
  841. args[3] = _xml_xmlchar_zval(systemId, 0, parser->target_encoding);
  842. args[4] = _xml_xmlchar_zval(publicId, 0, parser->target_encoding);
  843. if ((retval = xml_call_handler(parser, parser->externalEntityRefHandler, parser->externalEntityRefPtr, 5, args))) {
  844. convert_to_long(retval);
  845. ret = Z_LVAL_P(retval);
  846. efree(retval);
  847. } else {
  848. ret = 0;
  849. }
  850. }
  851. return ret;
  852. }
  853. /* }}} */
  854. /* {{{ _xml_startNamespaceDeclHandler() */
  855. void _xml_startNamespaceDeclHandler(void *userData,const XML_Char *prefix, const XML_Char *uri)
  856. {
  857. xml_parser *parser = (xml_parser *)userData;
  858. if (parser && parser->startNamespaceDeclHandler) {
  859. zval *retval, *args[3];
  860. args[0] = _xml_resource_zval(parser->index);
  861. args[1] = _xml_xmlchar_zval(prefix, 0, parser->target_encoding);
  862. args[2] = _xml_xmlchar_zval(uri, 0, parser->target_encoding);
  863. if ((retval = xml_call_handler(parser, parser->startNamespaceDeclHandler, parser->startNamespaceDeclPtr, 3, args))) {
  864. zval_ptr_dtor(&retval);
  865. }
  866. }
  867. }
  868. /* }}} */
  869. /* {{{ _xml_endNamespaceDeclHandler() */
  870. void _xml_endNamespaceDeclHandler(void *userData, const XML_Char *prefix)
  871. {
  872. xml_parser *parser = (xml_parser *)userData;
  873. if (parser && parser->endNamespaceDeclHandler) {
  874. zval *retval, *args[2];
  875. args[0] = _xml_resource_zval(parser->index);
  876. args[1] = _xml_xmlchar_zval(prefix, 0, parser->target_encoding);
  877. if ((retval = xml_call_handler(parser, parser->endNamespaceDeclHandler, parser->endNamespaceDeclPtr, 2, args))) {
  878. zval_ptr_dtor(&retval);
  879. }
  880. }
  881. }
  882. /* }}} */
  883. /************************* EXTENSION FUNCTIONS *************************/
  884. static void php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAMETERS, int ns_support)
  885. {
  886. xml_parser *parser;
  887. int auto_detect = 0;
  888. char *encoding_param = NULL;
  889. int encoding_param_len = 0;
  890. char *ns_param = NULL;
  891. int ns_param_len = 0;
  892. XML_Char *encoding;
  893. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, (ns_support ? "|ss": "|s"), &encoding_param, &encoding_param_len, &ns_param, &ns_param_len) == FAILURE) {
  894. RETURN_FALSE;
  895. }
  896. if (encoding_param != NULL) {
  897. /* The supported encoding types are hardcoded here because
  898. * we are limited to the encodings supported by expat/xmltok.
  899. */
  900. if (encoding_param_len == 0) {
  901. encoding = XML(default_encoding);
  902. auto_detect = 1;
  903. } else if (strcasecmp(encoding_param, "ISO-8859-1") == 0) {
  904. encoding = "ISO-8859-1";
  905. } else if (strcasecmp(encoding_param, "UTF-8") == 0) {
  906. encoding = "UTF-8";
  907. } else if (strcasecmp(encoding_param, "US-ASCII") == 0) {
  908. encoding = "US-ASCII";
  909. } else {
  910. php_error_docref(NULL TSRMLS_CC, E_WARNING, "unsupported source encoding \"%s\"", encoding_param);
  911. RETURN_FALSE;
  912. }
  913. } else {
  914. encoding = XML(default_encoding);
  915. }
  916. if (ns_support && ns_param == NULL){
  917. ns_param = ":";
  918. }
  919. parser = ecalloc(sizeof(xml_parser), 1);
  920. parser->parser = XML_ParserCreate_MM((auto_detect ? NULL : encoding),
  921. &php_xml_mem_hdlrs, ns_param);
  922. parser->target_encoding = encoding;
  923. parser->case_folding = 1;
  924. parser->object = NULL;
  925. XML_SetUserData(parser->parser, parser);
  926. ZEND_REGISTER_RESOURCE(return_value, parser,le_xml_parser);
  927. parser->index = Z_LVAL_P(return_value);
  928. }
  929. /* {{{ proto resource xml_parser_create([string encoding])
  930. Create an XML parser */
  931. PHP_FUNCTION(xml_parser_create)
  932. {
  933. php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  934. }
  935. /* }}} */
  936. /* {{{ proto resource xml_parser_create_ns([string encoding [, string sep]])
  937. Create an XML parser */
  938. PHP_FUNCTION(xml_parser_create_ns)
  939. {
  940. php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  941. }
  942. /* }}} */
  943. /* {{{ proto int xml_set_object(resource parser, object &obj)
  944. Set up object which should be used for callbacks */
  945. PHP_FUNCTION(xml_set_object)
  946. {
  947. xml_parser *parser;
  948. zval **pind, **mythis;
  949. if (ZEND_NUM_ARGS() != 2 ||
  950. zend_get_parameters_ex(2, &pind, &mythis) == FAILURE) {
  951. WRONG_PARAM_COUNT;
  952. }
  953. if (Z_TYPE_PP(mythis) != IS_OBJECT) {
  954. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Argument 2 has wrong type");
  955. RETURN_FALSE;
  956. }
  957. ZEND_FETCH_RESOURCE(parser,xml_parser *,pind, -1, "XML Parser", le_xml_parser);
  958. /* please leave this commented - or ask thies@thieso.net before doing it (again) */
  959. #ifdef ZEND_ENGINE_2
  960. if (parser->object) {
  961. zval_ptr_dtor(&parser->object);
  962. }
  963. #endif
  964. parser->object = *mythis;
  965. /* please leave this commented - or ask thies@thieso.net before doing it (again) */
  966. /* #ifdef ZEND_ENGINE_2
  967. zval_add_ref(&parser->object);
  968. #endif */
  969. RETVAL_TRUE;
  970. }
  971. /* }}} */
  972. /* {{{ proto int xml_set_element_handler(resource parser, string shdl, string ehdl)
  973. Set up start and end element handlers */
  974. PHP_FUNCTION(xml_set_element_handler)
  975. {
  976. xml_parser *parser;
  977. zval **pind, **shdl, **ehdl;
  978. if (ZEND_NUM_ARGS() != 3 ||
  979. zend_get_parameters_ex(3, &pind, &shdl, &ehdl) == FAILURE) {
  980. WRONG_PARAM_COUNT;
  981. }
  982. ZEND_FETCH_RESOURCE(parser,xml_parser *,pind, -1, "XML Parser", le_xml_parser);
  983. xml_set_handler(&parser->startElementHandler, shdl);
  984. xml_set_handler(&parser->endElementHandler, ehdl);
  985. XML_SetElementHandler(parser->parser, _xml_startElementHandler, _xml_endElementHandler);
  986. RETVAL_TRUE;
  987. }
  988. /* }}} */
  989. /* {{{ proto int xml_set_character_data_handler(resource parser, string hdl)
  990. Set up character data handler */
  991. PHP_FUNCTION(xml_set_character_data_handler)
  992. {
  993. xml_parser *parser;
  994. zval **pind, **hdl;
  995. if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &pind, &hdl) == FAILURE) {
  996. WRONG_PARAM_COUNT;
  997. }
  998. ZEND_FETCH_RESOURCE(parser,xml_parser *, pind, -1, "XML Parser", le_xml_parser);
  999. xml_set_handler(&parser->characterDataHandler, hdl);
  1000. XML_SetCharacterDataHandler(parser->parser, _xml_characterDataHandler);
  1001. RETVAL_TRUE;
  1002. }
  1003. /* }}} */
  1004. /* {{{ proto int xml_set_processing_instruction_handler(resource parser, string hdl)
  1005. Set up processing instruction (PI) handler */
  1006. PHP_FUNCTION(xml_set_processing_instruction_handler)
  1007. {
  1008. xml_parser *parser;
  1009. zval **pind, **hdl;
  1010. if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &pind, &hdl) == FAILURE) {
  1011. WRONG_PARAM_COUNT;
  1012. }
  1013. ZEND_FETCH_RESOURCE(parser,xml_parser *, pind, -1, "XML Parser", le_xml_parser);
  1014. xml_set_handler(&parser->processingInstructionHandler, hdl);
  1015. XML_SetProcessingInstructionHandler(parser->parser, _xml_processingInstructionHandler);
  1016. RETVAL_TRUE;
  1017. }
  1018. /* }}} */
  1019. /* {{{ proto int xml_set_default_handler(resource parser, string hdl)
  1020. Set up default handler */
  1021. PHP_FUNCTION(xml_set_default_handler)
  1022. {
  1023. xml_parser *parser;
  1024. zval **pind, **hdl;
  1025. if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &pind, &hdl) == FAILURE) {
  1026. WRONG_PARAM_COUNT;
  1027. }
  1028. ZEND_FETCH_RESOURCE(parser,xml_parser *, pind, -1, "XML Parser", le_xml_parser);
  1029. xml_set_handler(&parser->defaultHandler, hdl);
  1030. XML_SetDefaultHandler(parser->parser, _xml_defaultHandler);
  1031. RETVAL_TRUE;
  1032. }
  1033. /* }}} */
  1034. /* {{{ proto int xml_set_unparsed_entity_decl_handler(resource parser, string hdl)
  1035. Set up unparsed entity declaration handler */
  1036. PHP_FUNCTION(xml_set_unparsed_entity_decl_handler)
  1037. {
  1038. xml_parser *parser;
  1039. zval **pind, **hdl;
  1040. if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &pind, &hdl) == FAILURE) {
  1041. WRONG_PARAM_COUNT;
  1042. }
  1043. ZEND_FETCH_RESOURCE(parser,xml_parser *, pind, -1, "XML Parser", le_xml_parser);
  1044. xml_set_handler(&parser->unparsedEntityDeclHandler, hdl);
  1045. XML_SetUnparsedEntityDeclHandler(parser->parser, _xml_unparsedEntityDeclHandler);
  1046. RETVAL_TRUE;
  1047. }
  1048. /* }}} */
  1049. /* {{{ proto int xml_set_notation_decl_handler(resource parser, string hdl)
  1050. Set up notation declaration handler */
  1051. PHP_FUNCTION(xml_set_notation_decl_handler)
  1052. {
  1053. xml_parser *parser;
  1054. zval **pind, **hdl;
  1055. if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &pind, &hdl) == FAILURE) {
  1056. WRONG_PARAM_COUNT;
  1057. }
  1058. ZEND_FETCH_RESOURCE(parser,xml_parser *, pind, -1, "XML Parser", le_xml_parser);
  1059. xml_set_handler(&parser->notationDeclHandler, hdl);
  1060. XML_SetNotationDeclHandler(parser->parser, _xml_notationDeclHandler);
  1061. RETVAL_TRUE;
  1062. }
  1063. /* }}} */
  1064. /* {{{ proto int xml_set_external_entity_ref_handler(resource parser, string hdl)
  1065. Set up external entity reference handler */
  1066. PHP_FUNCTION(xml_set_external_entity_ref_handler)
  1067. {
  1068. xml_parser *parser;
  1069. zval **pind, **hdl;
  1070. if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &pind, &hdl) == FAILURE) {
  1071. WRONG_PARAM_COUNT;
  1072. }
  1073. ZEND_FETCH_RESOURCE(parser,xml_parser *, pind, -1, "XML Parser", le_xml_parser);
  1074. xml_set_handler(&parser->externalEntityRefHandler, hdl);
  1075. XML_SetExternalEntityRefHandler(parser->parser, (void *) _xml_externalEntityRefHandler);
  1076. RETVAL_TRUE;
  1077. }
  1078. /* }}} */
  1079. /* {{{ proto int xml_set_start_namespace_decl_handler(resource parser, string hdl)
  1080. Set up character data handler */
  1081. PHP_FUNCTION(xml_set_start_namespace_decl_handler)
  1082. {
  1083. xml_parser *parser;
  1084. zval **pind, **hdl;
  1085. if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &pind, &hdl) == FAILURE) {
  1086. WRONG_PARAM_COUNT;
  1087. }
  1088. ZEND_FETCH_RESOURCE(parser,xml_parser *, pind, -1, "XML Parser", le_xml_parser);
  1089. xml_set_handler(&parser->startNamespaceDeclHandler, hdl);
  1090. XML_SetStartNamespaceDeclHandler(parser->parser, _xml_startNamespaceDeclHandler);
  1091. RETVAL_TRUE;
  1092. }
  1093. /* }}} */
  1094. /* {{{ proto int xml_set_end_namespace_decl_handler(resource parser, string hdl)
  1095. Set up character data handler */
  1096. PHP_FUNCTION(xml_set_end_namespace_decl_handler)
  1097. {
  1098. xml_parser *parser;
  1099. zval **pind, **hdl;
  1100. if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &pind, &hdl) == FAILURE) {
  1101. WRONG_PARAM_COUNT;
  1102. }
  1103. ZEND_FETCH_RESOURCE(parser,xml_parser *, pind, -1, "XML Parser", le_xml_parser);
  1104. xml_set_handler(&parser->endNamespaceDeclHandler, hdl);
  1105. XML_SetEndNamespaceDeclHandler(parser->parser, _xml_endNamespaceDeclHandler);
  1106. RETVAL_TRUE;
  1107. }
  1108. /* }}} */
  1109. /* {{{ proto int xml_parse(resource parser, string data [, int isFinal])
  1110. Start parsing an XML document */
  1111. PHP_FUNCTION(xml_parse)
  1112. {
  1113. xml_parser *parser;
  1114. zval **pind, **data, **final;
  1115. int argc, isFinal, ret;
  1116. argc = ZEND_NUM_ARGS();
  1117. if (argc < 2 || argc > 3 || zend_get_parameters_ex(argc, &pind, &data, &final) == FAILURE) {
  1118. WRONG_PARAM_COUNT;
  1119. }
  1120. ZEND_FETCH_RESOURCE(parser,xml_parser *, pind, -1, "XML Parser", le_xml_parser);
  1121. convert_to_string_ex(data);
  1122. if (argc == 3) {
  1123. convert_to_long_ex(final);
  1124. isFinal = Z_LVAL_PP(final);
  1125. } else {
  1126. isFinal = 0;
  1127. }
  1128. ret = XML_Parse(parser->parser, Z_STRVAL_PP(data), Z_STRLEN_PP(data), isFinal);
  1129. RETVAL_LONG(ret);
  1130. }
  1131. /* }}} */
  1132. /* {{{ proto int xml_parse_into_struct(resource parser, string data, array &struct, array &index)
  1133. Parsing a XML document */
  1134. PHP_FUNCTION(xml_parse_into_struct)
  1135. {
  1136. xml_parser *parser;
  1137. zval **pind, **data, **xdata, **info = 0;
  1138. int argc, ret;
  1139. argc = ZEND_NUM_ARGS();
  1140. if (zend_get_parameters_ex(4, &pind, &data, &xdata,&info) == SUCCESS) {
  1141. zval_dtor(*info);
  1142. array_init(*info);
  1143. } else if (zend_get_parameters_ex(3, &pind, &data, &xdata) == FAILURE) {
  1144. WRONG_PARAM_COUNT;
  1145. }
  1146. ZEND_FETCH_RESOURCE(parser,xml_parser *, pind, -1, "XML Parser", le_xml_parser);
  1147. convert_to_string_ex(data);
  1148. zval_dtor(*xdata);
  1149. array_init(*xdata);
  1150. parser->data = *xdata;
  1151. if (info)
  1152. parser->info = *info;
  1153. parser->level = 0;
  1154. parser->ltags = safe_emalloc(XML_MAXLEVEL, sizeof(char *), 0);
  1155. XML_SetDefaultHandler(parser->parser, _xml_defaultHandler);
  1156. XML_SetElementHandler(parser->parser, _xml_startElementHandler, _xml_endElementHandler);
  1157. XML_SetCharacterDataHandler(parser->parser, _xml_characterDataHandler);
  1158. ret = XML_Parse(parser->parser, Z_STRVAL_PP(data), Z_STRLEN_PP(data), 1);
  1159. RETVAL_LONG(ret);
  1160. }
  1161. /* }}} */
  1162. /* {{{ proto int xml_get_error_code(resource parser)
  1163. Get XML parser error code */
  1164. PHP_FUNCTION(xml_get_error_code)
  1165. {
  1166. xml_parser *parser;
  1167. zval **pind;
  1168. if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &pind) == FAILURE) {
  1169. WRONG_PARAM_COUNT;
  1170. }
  1171. ZEND_FETCH_RESOURCE(parser,xml_parser *, pind, -1, "XML Parser", le_xml_parser);
  1172. RETVAL_LONG((long)XML_GetErrorCode(parser->parser));
  1173. }
  1174. /* }}} */
  1175. /* {{{ proto string xml_error_string(int code)
  1176. Get XML parser error string */
  1177. PHP_FUNCTION(xml_error_string)
  1178. {
  1179. zval **code;
  1180. char *str;
  1181. if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &code) == FAILURE) {
  1182. WRONG_PARAM_COUNT;
  1183. }
  1184. convert_to_long_ex(code);
  1185. str = (char *)XML_ErrorString((int)Z_LVAL_PP(code));
  1186. if (str) {
  1187. RETVAL_STRING(str, 1);
  1188. }
  1189. }
  1190. /* }}} */
  1191. /* {{{ proto int xml_get_current_line_number(resource parser)
  1192. Get current line number for an XML parser */
  1193. PHP_FUNCTION(xml_get_current_line_number)
  1194. {
  1195. xml_parser *parser;
  1196. zval **pind;
  1197. if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &pind) == FAILURE) {
  1198. WRONG_PARAM_COUNT;
  1199. }
  1200. ZEND_FETCH_RESOURCE(parser,xml_parser *, pind, -1, "XML Parser", le_xml_parser);
  1201. RETVAL_LONG(XML_GetCurrentLineNumber(parser->parser));
  1202. }
  1203. /* }}} */
  1204. /* {{{ proto int xml_get_current_column_number(resource parser)
  1205. Get current column number for an XML parser */
  1206. PHP_FUNCTION(xml_get_current_column_number)
  1207. {
  1208. xml_parser *parser;
  1209. zval **pind;
  1210. if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &pind) == FAILURE) {
  1211. WRONG_PARAM_COUNT;
  1212. }
  1213. ZEND_FETCH_RESOURCE(parser,xml_parser *, pind, -1, "XML Parser", le_xml_parser);
  1214. RETVAL_LONG(XML_GetCurrentColumnNumber(parser->parser));
  1215. }
  1216. /* }}} */
  1217. /* {{{ proto int xml_get_current_byte_index(resource parser)
  1218. Get current byte index for an XML parser */
  1219. PHP_FUNCTION(xml_get_current_byte_index)
  1220. {
  1221. xml_parser *parser;
  1222. zval **pind;
  1223. if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &pind) == FAILURE) {
  1224. WRONG_PARAM_COUNT;
  1225. }
  1226. ZEND_FETCH_RESOURCE(parser,xml_parser *, pind, -1, "XML Parser", le_xml_parser);
  1227. RETVAL_LONG(XML_GetCurrentByteIndex(parser->parser));
  1228. }
  1229. /* }}} */
  1230. /* {{{ proto int xml_parser_free(resource parser)
  1231. Free an XML parser */
  1232. PHP_FUNCTION(xml_parser_free)
  1233. {
  1234. zval **pind;
  1235. xml_parser *parser;
  1236. if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &pind) == FAILURE) {
  1237. WRONG_PARAM_COUNT;
  1238. }
  1239. ZEND_FETCH_RESOURCE(parser,xml_parser *, pind, -1, "XML Parser", le_xml_parser);
  1240. if (zend_list_delete(parser->index) == FAILURE) {
  1241. RETURN_FALSE;
  1242. }
  1243. RETVAL_TRUE;
  1244. }
  1245. /* }}} */
  1246. /* {{{ proto int xml_parser_set_option(resource parser, int option, mixed value)
  1247. Set options in an XML parser */
  1248. PHP_FUNCTION(xml_parser_set_option)
  1249. {
  1250. xml_parser *parser;
  1251. zval **pind, **opt, **val;
  1252. if (ZEND_NUM_ARGS() != 3 || zend_get_parameters_ex(3, &pind, &opt, &val) == FAILURE) {
  1253. WRONG_PARAM_COUNT;
  1254. }
  1255. ZEND_FETCH_RESOURCE(parser,xml_parser *, pind, -1, "XML Parser", le_xml_parser);
  1256. convert_to_long_ex(opt);
  1257. switch (Z_LVAL_PP(opt)) {
  1258. case PHP_XML_OPTION_CASE_FOLDING:
  1259. convert_to_long_ex(val);
  1260. parser->case_folding = Z_LVAL_PP(val);
  1261. break;
  1262. case PHP_XML_OPTION_SKIP_TAGSTART:
  1263. convert_to_long_ex(val);
  1264. parser->toffset = Z_LVAL_PP(val);
  1265. break;
  1266. case PHP_XML_OPTION_SKIP_WHITE:
  1267. convert_to_long_ex(val);
  1268. parser->skipwhite = Z_LVAL_PP(val);
  1269. break;
  1270. case PHP_XML_OPTION_TARGET_ENCODING: {
  1271. xml_encoding *enc;
  1272. convert_to_string_ex(val);
  1273. enc = xml_get_encoding(Z_STRVAL_PP(val));
  1274. if (enc == NULL) {
  1275. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported target encoding \"%s\"", Z_STRVAL_PP(val));
  1276. RETURN_FALSE;
  1277. }
  1278. parser->target_encoding = enc->name;
  1279. break;
  1280. }
  1281. default:
  1282. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown option");
  1283. RETURN_FALSE;
  1284. break;
  1285. }
  1286. RETVAL_TRUE;
  1287. }
  1288. /* }}} */
  1289. /* {{{ proto int xml_parser_get_option(resource parser, int option)
  1290. Get options from an XML parser */
  1291. PHP_FUNCTION(xml_parser_get_option)
  1292. {
  1293. xml_parser *parser;
  1294. zval **pind, **opt;
  1295. if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &pind, &opt) == FAILURE) {
  1296. WRONG_PARAM_COUNT;
  1297. }
  1298. ZEND_FETCH_RESOURCE(parser,xml_parser *, pind, -1, "XML Parser", le_xml_parser);
  1299. convert_to_long_ex(opt);
  1300. switch (Z_LVAL_PP(opt)) {
  1301. case PHP_XML_OPTION_CASE_FOLDING:
  1302. RETURN_LONG(parser->case_folding);
  1303. break;
  1304. case PHP_XML_OPTION_TARGET_ENCODING:
  1305. RETURN_STRING(parser->target_encoding, 1);
  1306. break;
  1307. default:
  1308. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown option");
  1309. RETURN_FALSE;
  1310. break;
  1311. }
  1312. RETVAL_FALSE; /* never reached */
  1313. }
  1314. /* }}} */
  1315. /* {{{ proto string utf8_encode(string data)
  1316. Encodes an ISO-8859-1 string to UTF-8 */
  1317. PHP_FUNCTION(utf8_encode)
  1318. {
  1319. zval **arg;
  1320. XML_Char *encoded;
  1321. int len;
  1322. if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &arg) == FAILURE) {
  1323. WRONG_PARAM_COUNT;
  1324. }
  1325. convert_to_string_ex(arg);
  1326. encoded = xml_utf8_encode(Z_STRVAL_PP(arg), Z_STRLEN_PP(arg), &len, "ISO-8859-1");
  1327. if (encoded == NULL) {
  1328. RETURN_FALSE;
  1329. }
  1330. RETVAL_STRINGL(encoded, len, 0);
  1331. }
  1332. /* }}} */
  1333. /* {{{ proto string utf8_decode(string data)
  1334. Converts a UTF-8 encoded string to ISO-8859-1 */
  1335. PHP_FUNCTION(utf8_decode)
  1336. {
  1337. zval **arg;
  1338. XML_Char *decoded;
  1339. int len;
  1340. if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &arg) == FAILURE) {
  1341. WRONG_PARAM_COUNT;
  1342. }
  1343. convert_to_string_ex(arg);
  1344. decoded = xml_utf8_decode(Z_STRVAL_PP(arg), Z_STRLEN_PP(arg), &len, "ISO-8859-1");
  1345. if (decoded == NULL) {
  1346. RETURN_FALSE;
  1347. }
  1348. RETVAL_STRINGL(decoded, len, 0);
  1349. }
  1350. /* }}} */
  1351. #endif
  1352. /*
  1353. * Local variables:
  1354. * tab-width: 4
  1355. * c-basic-offset: 4
  1356. * End:
  1357. * vim600: sw=4 ts=4 fdm=marker
  1358. * vim<600: sw=4 ts=4
  1359. */