You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

339 lines
10 KiB

11 years ago
  1. /*****************************************************************************
  2. Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
  3. This program is free software; you can redistribute it and/or modify it under
  4. the terms of the GNU General Public License as published by the Free Software
  5. Foundation; version 2 of the License.
  6. This program is distributed in the hope that it will be useful, but WITHOUT
  7. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  8. FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  9. You should have received a copy of the GNU General Public License along with
  10. this program; if not, write to the Free Software Foundation, Inc.,
  11. 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
  12. *****************************************************************************/
  13. /******************************************************************//**
  14. @file include/fts0ast.h
  15. The FTS query parser (AST) abstract syntax tree routines
  16. Created 2007/03/16/03 Sunny Bains
  17. *******************************************************/
  18. #ifndef INNOBASE_FST0AST_H
  19. #define INNOBASE_FST0AST_H
  20. #include "mem0mem.h"
  21. #include "ha_prototypes.h"
  22. /* The type of AST Node */
  23. enum fts_ast_type_t {
  24. FTS_AST_OPER, /*!< Operator */
  25. FTS_AST_NUMB, /*!< Number */
  26. FTS_AST_TERM, /*!< Term (or word) */
  27. FTS_AST_TEXT, /*!< Text string */
  28. FTS_AST_LIST, /*!< Expression list */
  29. FTS_AST_SUBEXP_LIST /*!< Sub-Expression list */
  30. };
  31. /* The FTS query operators that we support */
  32. enum fts_ast_oper_t {
  33. FTS_NONE, /*!< No operator */
  34. FTS_IGNORE, /*!< Ignore rows that contain
  35. this word */
  36. FTS_EXIST, /*!< Include rows that contain
  37. this word */
  38. FTS_NEGATE, /*!< Include rows that contain
  39. this word but rank them
  40. lower*/
  41. FTS_INCR_RATING, /*!< Increase the rank for this
  42. word*/
  43. FTS_DECR_RATING, /*!< Decrease the rank for this
  44. word*/
  45. FTS_DISTANCE, /*!< Proximity distance */
  46. FTS_IGNORE_SKIP, /*!< Transient node operator
  47. signifies that this is a
  48. FTS_IGNORE node, and ignored in
  49. the first pass of
  50. fts_ast_visit() */
  51. FTS_EXIST_SKIP /*!< Transient node operator
  52. signifies that this ia a
  53. FTS_EXIST node, and ignored in
  54. the first pass of
  55. fts_ast_visit() */
  56. };
  57. /* Data types used by the FTS parser */
  58. struct fts_lexer_t;
  59. struct fts_ast_node_t;
  60. struct fts_ast_state_t;
  61. struct fts_ast_string_t;
  62. typedef dberr_t (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*);
  63. /********************************************************************
  64. Parse the string using the lexer setup within state.*/
  65. int
  66. fts_parse(
  67. /*======*/
  68. /* out: 0 on OK, 1 on error */
  69. fts_ast_state_t* state); /*!< in: ast state instance.*/
  70. /********************************************************************
  71. Create an AST operator node */
  72. extern
  73. fts_ast_node_t*
  74. fts_ast_create_node_oper(
  75. /*=====================*/
  76. void* arg, /*!< in: ast state */
  77. fts_ast_oper_t oper); /*!< in: ast operator */
  78. /********************************************************************
  79. Create an AST term node, makes a copy of ptr */
  80. extern
  81. fts_ast_node_t*
  82. fts_ast_create_node_term(
  83. /*=====================*/
  84. void* arg, /*!< in: ast state */
  85. const fts_ast_string_t* ptr); /*!< in: term string */
  86. /********************************************************************
  87. Create an AST text node */
  88. extern
  89. fts_ast_node_t*
  90. fts_ast_create_node_text(
  91. /*=====================*/
  92. void* arg, /*!< in: ast state */
  93. const fts_ast_string_t* ptr); /*!< in: text string */
  94. /********************************************************************
  95. Create an AST expr list node */
  96. extern
  97. fts_ast_node_t*
  98. fts_ast_create_node_list(
  99. /*=====================*/
  100. void* arg, /*!< in: ast state */
  101. fts_ast_node_t* expr); /*!< in: ast expr */
  102. /********************************************************************
  103. Create a sub-expression list node. This function takes ownership of
  104. expr and is responsible for deleting it. */
  105. extern
  106. fts_ast_node_t*
  107. fts_ast_create_node_subexp_list(
  108. /*============================*/
  109. /* out: new node */
  110. void* arg, /*!< in: ast state instance */
  111. fts_ast_node_t* expr); /*!< in: ast expr instance */
  112. /********************************************************************
  113. Set the wildcard attribute of a term.*/
  114. extern
  115. void
  116. fts_ast_term_set_wildcard(
  117. /*======================*/
  118. fts_ast_node_t* node); /*!< in: term to change */
  119. /********************************************************************
  120. Set the proximity attribute of a text node. */
  121. void
  122. fts_ast_term_set_distance(
  123. /*======================*/
  124. fts_ast_node_t* node, /*!< in/out: text node */
  125. ulint distance); /*!< in: the text proximity
  126. distance */
  127. /********************************************************************//**
  128. Free a fts_ast_node_t instance.
  129. @return next node to free */
  130. UNIV_INTERN
  131. fts_ast_node_t*
  132. fts_ast_free_node(
  133. /*==============*/
  134. fts_ast_node_t* node); /*!< in: node to free */
  135. /********************************************************************
  136. Add a sub-expression to an AST*/
  137. extern
  138. fts_ast_node_t*
  139. fts_ast_add_node(
  140. /*=============*/
  141. fts_ast_node_t* list, /*!< in: list node instance */
  142. fts_ast_node_t* node); /*!< in: (sub) expr to add */
  143. /********************************************************************
  144. Print the AST node recursively.*/
  145. extern
  146. void
  147. fts_ast_node_print(
  148. /*===============*/
  149. fts_ast_node_t* node); /*!< in: ast node to print */
  150. /********************************************************************
  151. For tracking node allocations, in case there is an during parsing.*/
  152. extern
  153. void
  154. fts_ast_state_add_node(
  155. /*===================*/
  156. fts_ast_state_t*state, /*!< in: ast state instance */
  157. fts_ast_node_t* node); /*!< in: node to add to state */
  158. /********************************************************************
  159. Free node and expr allocations.*/
  160. extern
  161. void
  162. fts_ast_state_free(
  163. /*===============*/
  164. fts_ast_state_t*state); /*!< in: state instance
  165. to free */
  166. /******************************************************************//**
  167. Traverse the AST - in-order traversal.
  168. @return DB_SUCCESS if all went well */
  169. UNIV_INTERN
  170. dberr_t
  171. fts_ast_visit(
  172. /*==========*/
  173. fts_ast_oper_t oper, /*!< in: FTS operator */
  174. fts_ast_node_t* node, /*!< in: instance to traverse*/
  175. fts_ast_callback visitor, /*!< in: callback */
  176. void* arg, /*!< in: callback arg */
  177. bool* has_ignore) /*!< out: whether we encounter
  178. and ignored processing an
  179. operator, currently we only
  180. ignore FTS_IGNORE operator */
  181. __attribute__((nonnull, warn_unused_result));
  182. /*****************************************************************//**
  183. Process (nested) sub-expression, create a new result set to store the
  184. sub-expression result by processing nodes under current sub-expression
  185. list. Merge the sub-expression result with that of parent expression list.
  186. @return DB_SUCCESS if all went well */
  187. UNIV_INTERN
  188. dberr_t
  189. fts_ast_visit_sub_exp(
  190. /*==================*/
  191. fts_ast_node_t* node, /*!< in: instance to traverse*/
  192. fts_ast_callback visitor, /*!< in: callback */
  193. void* arg) /*!< in: callback arg */
  194. __attribute__((nonnull, warn_unused_result));
  195. /********************************************************************
  196. Create a lex instance.*/
  197. UNIV_INTERN
  198. fts_lexer_t*
  199. fts_lexer_create(
  200. /*=============*/
  201. ibool boolean_mode, /*!< in: query type */
  202. const byte* query, /*!< in: query string */
  203. ulint query_len) /*!< in: query string len */
  204. __attribute__((nonnull, malloc, warn_unused_result));
  205. /********************************************************************
  206. Free an fts_lexer_t instance.*/
  207. UNIV_INTERN
  208. void
  209. fts_lexer_free(
  210. /*===========*/
  211. fts_lexer_t* fts_lexer) /*!< in: lexer instance to
  212. free */
  213. __attribute__((nonnull));
  214. /**
  215. Create an ast string object, with NUL-terminator, so the string
  216. has one more byte than len
  217. @param[in] str pointer to string
  218. @param[in] len length of the string
  219. @return ast string with NUL-terminator */
  220. UNIV_INTERN
  221. fts_ast_string_t*
  222. fts_ast_string_create(
  223. const byte* str,
  224. ulint len);
  225. /**
  226. Free an ast string instance
  227. @param[in,out] ast_str string to free */
  228. UNIV_INTERN
  229. void
  230. fts_ast_string_free(
  231. fts_ast_string_t* ast_str);
  232. /**
  233. Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul
  234. @param[in] str string to translate
  235. @param[in] base the base
  236. @return translated number */
  237. UNIV_INTERN
  238. ulint
  239. fts_ast_string_to_ul(
  240. const fts_ast_string_t* ast_str,
  241. int base);
  242. /**
  243. Print the ast string
  244. @param[in] str string to print */
  245. UNIV_INTERN
  246. void
  247. fts_ast_string_print(
  248. const fts_ast_string_t* ast_str);
  249. /* String of length len.
  250. We always store the string of length len with a terminating '\0',
  251. regardless of there is any 0x00 in the string itself */
  252. struct fts_ast_string_t {
  253. /*!< Pointer to string. */
  254. byte* str;
  255. /*!< Length of the string. */
  256. ulint len;
  257. };
  258. /* Query term type */
  259. struct fts_ast_term_t {
  260. fts_ast_string_t* ptr; /*!< Pointer to term string.*/
  261. ibool wildcard; /*!< TRUE if wild card set.*/
  262. };
  263. /* Query text type */
  264. struct fts_ast_text_t {
  265. fts_ast_string_t* ptr; /*!< Pointer to text string.*/
  266. ulint distance; /*!< > 0 if proximity distance
  267. set */
  268. };
  269. /* The list of nodes in an expr list */
  270. struct fts_ast_list_t {
  271. fts_ast_node_t* head; /*!< Children list head */
  272. fts_ast_node_t* tail; /*!< Children list tail */
  273. };
  274. /* FTS AST node to store the term, text, operator and sub-expressions.*/
  275. struct fts_ast_node_t {
  276. fts_ast_type_t type; /*!< The type of node */
  277. fts_ast_text_t text; /*!< Text node */
  278. fts_ast_term_t term; /*!< Term node */
  279. fts_ast_oper_t oper; /*!< Operator value */
  280. fts_ast_list_t list; /*!< Expression list */
  281. fts_ast_node_t* next; /*!< Link for expr list */
  282. fts_ast_node_t* next_alloc; /*!< For tracking allocations */
  283. bool visited; /*!< whether this node is
  284. already processed */
  285. };
  286. /* To track state during parsing */
  287. struct fts_ast_state_t {
  288. mem_heap_t* heap; /*!< Heap to use for alloc */
  289. fts_ast_node_t* root; /*!< If all goes OK, then this
  290. will point to the root.*/
  291. fts_ast_list_t list; /*!< List of nodes allocated */
  292. fts_lexer_t* lexer; /*!< Lexer callback + arg */
  293. CHARSET_INFO* charset; /*!< charset used for
  294. tokenization */
  295. };
  296. #ifdef UNIV_DEBUG
  297. const char*
  298. fts_ast_oper_name_get(fts_ast_oper_t oper);
  299. const char*
  300. fts_ast_node_type_get(fts_ast_type_t type);
  301. #endif /* UNIV_DEBUG */
  302. #endif /* INNOBASE_FSTS0AST_H */