You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3115 lines
78 KiB

15 years ago
  1. //
  2. // $Id: ha_sphinx.cc 2058 2009-11-07 04:01:57Z shodan $
  3. //
  4. #ifdef USE_PRAGMA_IMPLEMENTATION
  5. #pragma implementation // gcc: Class implementation
  6. #endif
  7. #if _MSC_VER>=1400
  8. #define _CRT_SECURE_NO_DEPRECATE 1
  9. #define _CRT_NONSTDC_NO_DEPRECATE 1
  10. #endif
  11. #include <mysql_version.h>
  12. #if MYSQL_VERSION_ID>50100
  13. #include "mysql_priv.h"
  14. #include <mysql/plugin.h>
  15. #else
  16. #include "../mysql_priv.h"
  17. #endif
  18. #include <mysys_err.h>
  19. #include <my_sys.h>
  20. #ifndef __WIN__
  21. // UNIX-specific
  22. #include <my_net.h>
  23. #include <netdb.h>
  24. #include <sys/un.h>
  25. #define RECV_FLAGS MSG_WAITALL
  26. #define sphSockClose(_sock) ::close(_sock)
  27. #else
  28. // Windows-specific
  29. #include <io.h>
  30. #define strcasecmp stricmp
  31. #define snprintf _snprintf
  32. #define RECV_FLAGS 0
  33. #define sphSockClose(_sock) ::closesocket(_sock)
  34. #endif
  35. #include <ctype.h>
  36. #include "ha_sphinx.h"
  37. #ifndef MSG_WAITALL
  38. #define MSG_WAITALL 0
  39. #endif
  40. #if _MSC_VER>=1400
  41. #pragma warning(push,4)
  42. #endif
  43. /////////////////////////////////////////////////////////////////////////////
  44. /// there might be issues with min() on different platforms (eg. Gentoo, they say)
  45. #define Min(a,b) ((a)<(b)?(a):(b))
  46. /// unaligned RAM accesses are forbidden on SPARC
  47. #if defined(sparc) || defined(__sparc__)
  48. #define UNALIGNED_RAM_ACCESS 0
  49. #else
  50. #define UNALIGNED_RAM_ACCESS 1
  51. #endif
  52. #if MYSQL_VERSION_ID<50100
  53. #define thd_ha_data(X,Y) (X)->ha_data[sphinx_hton.slot]
  54. #define ha_thd() current_thd
  55. #endif // <50100
  56. #if UNALIGNED_RAM_ACCESS
  57. /// pass-through wrapper
  58. template < typename T > inline T sphUnalignedRead ( const T & tRef )
  59. {
  60. return tRef;
  61. }
  62. /// pass-through wrapper
  63. template < typename T > void sphUnalignedWrite ( void * pPtr, const T & tVal )
  64. {
  65. *(T*)pPtr = tVal;
  66. }
  67. #else
  68. /// unaligned read wrapper for some architectures (eg. SPARC)
  69. template < typename T >
  70. inline T sphUnalignedRead ( const T & tRef )
  71. {
  72. T uTmp;
  73. byte * pSrc = (byte *) &tRef;
  74. byte * pDst = (byte *) &uTmp;
  75. for ( int i=0; i<(int)sizeof(T); i++ )
  76. *pDst++ = *pSrc++;
  77. return uTmp;
  78. }
  79. /// unaligned write wrapper for some architectures (eg. SPARC)
  80. template < typename T >
  81. void sphUnalignedWrite ( void * pPtr, const T & tVal )
  82. {
  83. byte * pDst = (byte *) pPtr;
  84. byte * pSrc = (byte *) &tVal;
  85. for ( int i=0; i<(int)sizeof(T); i++ )
  86. *pDst++ = *pSrc++;
  87. }
  88. #endif
  89. /////////////////////////////////////////////////////////////////////////////
  90. // FIXME! make this all dynamic
  91. #define SPHINXSE_MAX_FILTERS 32
  92. #define SPHINXSE_DEFAULT_HOST "127.0.0.1"
  93. #define SPHINXSE_DEFAULT_PORT 9312
  94. #define SPHINXSE_DEFAULT_INDEX "*"
  95. #define SPHINXSE_SYSTEM_COLUMNS 3
  96. #define SPHINXSE_MAX_ALLOC (16*1024*1024)
  97. #define SPHINXSE_MAX_KEYWORDSTATS 4096
  98. // FIXME! all the following is cut-n-paste from sphinx.h and searchd.cpp
  99. #define SPHINX_VERSION "0.9.9"
  100. enum
  101. {
  102. SPHINX_SEARCHD_PROTO = 1,
  103. SEARCHD_COMMAND_SEARCH = 0,
  104. VER_COMMAND_SEARCH = 0x116,
  105. };
  106. /// search query sorting orders
  107. enum ESphSortOrder
  108. {
  109. SPH_SORT_RELEVANCE = 0, ///< sort by document relevance desc, then by date
  110. SPH_SORT_ATTR_DESC = 1, ///< sort by document date desc, then by relevance desc
  111. SPH_SORT_ATTR_ASC = 2, ///< sort by document date asc, then by relevance desc
  112. SPH_SORT_TIME_SEGMENTS = 3, ///< sort by time segments (hour/day/week/etc) desc, then by relevance desc
  113. SPH_SORT_EXTENDED = 4, ///< sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC")
  114. SPH_SORT_EXPR = 5, ///< sort by expression
  115. SPH_SORT_TOTAL
  116. };
  117. /// search query matching mode
  118. enum ESphMatchMode
  119. {
  120. SPH_MATCH_ALL = 0, ///< match all query words
  121. SPH_MATCH_ANY, ///< match any query word
  122. SPH_MATCH_PHRASE, ///< match this exact phrase
  123. SPH_MATCH_BOOLEAN, ///< match this boolean query
  124. SPH_MATCH_EXTENDED, ///< match this extended query
  125. SPH_MATCH_FULLSCAN, ///< match all document IDs w/o fulltext query, apply filters
  126. SPH_MATCH_EXTENDED2, ///< extended engine V2
  127. SPH_MATCH_TOTAL
  128. };
  129. /// search query relevance ranking mode
  130. enum ESphRankMode
  131. {
  132. SPH_RANK_PROXIMITY_BM25 = 0, ///< default mode, phrase proximity major factor and BM25 minor one
  133. SPH_RANK_BM25 = 1, ///< statistical mode, BM25 ranking only (faster but worse quality)
  134. SPH_RANK_NONE = 2, ///< no ranking, all matches get a weight of 1
  135. SPH_RANK_WORDCOUNT = 3, ///< simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
  136. SPH_RANK_PROXIMITY = 4, ///< phrase proximity
  137. SPH_RANK_MATCHANY = 5, ///< emulate old match-any weighting
  138. SPH_RANK_FIELDMASK = 6, ///< sets bits where there were matches
  139. SPH_RANK_TOTAL,
  140. SPH_RANK_DEFAULT = SPH_RANK_PROXIMITY_BM25
  141. };
  142. /// search query grouping mode
  143. enum ESphGroupBy
  144. {
  145. SPH_GROUPBY_DAY = 0, ///< group by day
  146. SPH_GROUPBY_WEEK = 1, ///< group by week
  147. SPH_GROUPBY_MONTH = 2, ///< group by month
  148. SPH_GROUPBY_YEAR = 3, ///< group by year
  149. SPH_GROUPBY_ATTR = 4 ///< group by attribute value
  150. };
  151. /// known attribute types
  152. enum
  153. {
  154. SPH_ATTR_NONE = 0, ///< not an attribute at all
  155. SPH_ATTR_INTEGER = 1, ///< this attr is just an integer
  156. SPH_ATTR_TIMESTAMP = 2, ///< this attr is a timestamp
  157. SPH_ATTR_ORDINAL = 3, ///< this attr is an ordinal string number (integer at search time, specially handled at indexing time)
  158. SPH_ATTR_BOOL = 4, ///< this attr is a boolean bit field
  159. SPH_ATTR_FLOAT = 5,
  160. SPH_ATTR_BIGINT = 6,
  161. SPH_ATTR_MULTI = 0x40000000UL ///< this attr has multiple values (0 or more)
  162. };
  163. /// known answers
  164. enum
  165. {
  166. SEARCHD_OK = 0, ///< general success, command-specific reply follows
  167. SEARCHD_ERROR = 1, ///< general failure, error message follows
  168. SEARCHD_RETRY = 2, ///< temporary failure, error message follows, client should retry later
  169. SEARCHD_WARNING = 3 ///< general success, warning message and command-specific reply follow
  170. };
  171. //////////////////////////////////////////////////////////////////////////////
  172. #define SPHINX_DEBUG_OUTPUT 0
  173. #define SPHINX_DEBUG_CALLS 0
  174. #include <stdarg.h>
  175. #if SPHINX_DEBUG_OUTPUT
  176. inline void SPH_DEBUG ( const char * format, ... )
  177. {
  178. va_list ap;
  179. va_start ( ap, format );
  180. fprintf ( stderr, "SphinxSE: " );
  181. vfprintf ( stderr, format, ap );
  182. fprintf ( stderr, "\n" );
  183. va_end ( ap );
  184. }
  185. #else
  186. inline void SPH_DEBUG ( const char *, ... ) {}
  187. #endif
  188. #if SPHINX_DEBUG_CALLS
  189. #define SPH_ENTER_FUNC() { SPH_DEBUG ( "enter %s", __FUNCTION__ ); }
  190. #define SPH_ENTER_METHOD() { SPH_DEBUG ( "enter %s(this=%08x)", __FUNCTION__, this ); }
  191. #define SPH_RET(_arg) { SPH_DEBUG ( "leave %s", __FUNCTION__ ); return _arg; }
  192. #define SPH_VOID_RET() { SPH_DEBUG ( "leave %s", __FUNCTION__ ); return; }
  193. #else
  194. #define SPH_ENTER_FUNC()
  195. #define SPH_ENTER_METHOD()
  196. #define SPH_RET(_arg) { return(_arg); }
  197. #define SPH_VOID_RET() { return; }
  198. #endif
  199. #define SafeDelete(_arg) { if ( _arg ) delete ( _arg ); (_arg) = NULL; }
  200. #define SafeDeleteArray(_arg) { if ( _arg ) delete [] ( _arg ); (_arg) = NULL; }
  201. //////////////////////////////////////////////////////////////////////////////
  202. /// a structure that will be shared among all open Sphinx SE handlers
  203. struct CSphSEShare
  204. {
  205. pthread_mutex_t m_tMutex;
  206. THR_LOCK m_tLock;
  207. char * m_sTable;
  208. char * m_sScheme;
  209. char * m_sHost; ///< points into m_sScheme buffer, DO NOT FREE EXPLICITLY
  210. char * m_sSocket; ///< points into m_sScheme buffer, DO NOT FREE EXPLICITLY
  211. char * m_sIndex; ///< points into m_sScheme buffer, DO NOT FREE EXPLICITLY
  212. ushort m_iPort;
  213. uint m_iTableNameLen;
  214. uint m_iUseCount;
  215. CHARSET_INFO * m_pTableQueryCharset;
  216. int m_iTableFields;
  217. char ** m_sTableField;
  218. enum_field_types * m_eTableFieldType;
  219. CSphSEShare ()
  220. : m_sTable ( NULL )
  221. , m_sScheme ( NULL )
  222. , m_sHost ( NULL )
  223. , m_sSocket ( NULL )
  224. , m_sIndex ( NULL )
  225. , m_iPort ( 0 )
  226. , m_iTableNameLen ( 0 )
  227. , m_iUseCount ( 1 )
  228. , m_pTableQueryCharset ( NULL )
  229. , m_iTableFields ( 0 )
  230. , m_sTableField ( NULL )
  231. , m_eTableFieldType ( NULL )
  232. {
  233. thr_lock_init ( &m_tLock );
  234. pthread_mutex_init ( &m_tMutex, MY_MUTEX_INIT_FAST );
  235. }
  236. ~CSphSEShare ()
  237. {
  238. pthread_mutex_destroy ( &m_tMutex );
  239. thr_lock_delete ( &m_tLock );
  240. SafeDeleteArray ( m_sTable );
  241. SafeDeleteArray ( m_sScheme );
  242. ResetTable ();
  243. }
  244. void ResetTable ()
  245. {
  246. for ( int i=0; i<m_iTableFields; i++ )
  247. SafeDeleteArray ( m_sTableField[i] );
  248. SafeDeleteArray ( m_sTableField );
  249. SafeDeleteArray ( m_eTableFieldType );
  250. }
  251. };
  252. /// schema attribute
  253. struct CSphSEAttr
  254. {
  255. char * m_sName; ///< attribute name (received from Sphinx)
  256. uint32 m_uType; ///< attribute type (received from Sphinx)
  257. int m_iField; ///< field index in current table (-1 if none)
  258. CSphSEAttr()
  259. : m_sName ( NULL )
  260. , m_uType ( SPH_ATTR_NONE )
  261. , m_iField ( -1 )
  262. {}
  263. ~CSphSEAttr ()
  264. {
  265. SafeDeleteArray ( m_sName );
  266. }
  267. };
  268. /// word stats
  269. struct CSphSEWordStats
  270. {
  271. char * m_sWord;
  272. int m_iDocs;
  273. int m_iHits;
  274. CSphSEWordStats ()
  275. : m_sWord ( NULL )
  276. , m_iDocs ( 0 )
  277. , m_iHits ( 0 )
  278. {}
  279. ~CSphSEWordStats ()
  280. {
  281. SafeDeleteArray ( m_sWord );
  282. }
  283. };
  284. /// request stats
  285. struct CSphSEStats
  286. {
  287. public:
  288. int m_iMatchesTotal;
  289. int m_iMatchesFound;
  290. int m_iQueryMsec;
  291. int m_iWords;
  292. CSphSEWordStats * m_dWords;
  293. bool m_bLastError;
  294. char m_sLastMessage[1024];
  295. CSphSEStats()
  296. : m_dWords ( NULL )
  297. {
  298. Reset ();
  299. }
  300. void Reset ()
  301. {
  302. m_iMatchesTotal = 0;
  303. m_iMatchesFound = 0;
  304. m_iQueryMsec = 0;
  305. m_iWords = 0;
  306. SafeDeleteArray ( m_dWords );
  307. m_bLastError = false;
  308. m_sLastMessage[0] = '\0';
  309. }
  310. ~CSphSEStats()
  311. {
  312. Reset ();
  313. }
  314. };
  315. /// thread local storage
  316. struct CSphSEThreadData
  317. {
  318. static const int MAX_QUERY_LEN = 262144; // 256k should be enough, right?
  319. bool m_bStats;
  320. CSphSEStats m_tStats;
  321. bool m_bQuery;
  322. char m_sQuery[MAX_QUERY_LEN];
  323. CHARSET_INFO * m_pQueryCharset;
  324. CSphSEThreadData ()
  325. : m_bStats ( false )
  326. , m_bQuery ( false )
  327. , m_pQueryCharset ( NULL )
  328. {}
  329. };
  330. /// filter types
  331. enum ESphFilter
  332. {
  333. SPH_FILTER_VALUES = 0, ///< filter by integer values set
  334. SPH_FILTER_RANGE = 1, ///< filter by integer range
  335. SPH_FILTER_FLOATRANGE = 2 ///< filter by float range
  336. };
  337. /// search query filter
  338. struct CSphSEFilter
  339. {
  340. public:
  341. ESphFilter m_eType;
  342. char * m_sAttrName;
  343. longlong m_uMinValue;
  344. longlong m_uMaxValue;
  345. float m_fMinValue;
  346. float m_fMaxValue;
  347. int m_iValues;
  348. longlong * m_pValues;
  349. int m_bExclude;
  350. public:
  351. CSphSEFilter ()
  352. : m_eType ( SPH_FILTER_VALUES )
  353. , m_sAttrName ( NULL )
  354. , m_uMinValue ( 0 )
  355. , m_uMaxValue ( UINT_MAX )
  356. , m_fMinValue ( 0.0f )
  357. , m_fMaxValue ( 0.0f )
  358. , m_iValues ( 0 )
  359. , m_pValues ( NULL )
  360. , m_bExclude ( 0 )
  361. {
  362. }
  363. ~CSphSEFilter ()
  364. {
  365. SafeDeleteArray ( m_pValues );
  366. }
  367. };
  368. /// float vs dword conversion
  369. inline uint32 sphF2DW ( float f ) { union { float f; uint32 d; } u; u.f = f; return u.d; }
  370. /// dword vs float conversion
  371. inline float sphDW2F ( uint32 d ) { union { float f; uint32 d; } u; u.d = d; return u.f; }
  372. /// client-side search query
  373. struct CSphSEQuery
  374. {
  375. public:
  376. const char * m_sHost;
  377. int m_iPort;
  378. private:
  379. char * m_sQueryBuffer;
  380. const char * m_sIndex;
  381. int m_iOffset;
  382. int m_iLimit;
  383. bool m_bQuery;
  384. char * m_sQuery;
  385. uint32 * m_pWeights;
  386. int m_iWeights;
  387. ESphMatchMode m_eMode;
  388. ESphRankMode m_eRanker;
  389. ESphSortOrder m_eSort;
  390. char * m_sSortBy;
  391. int m_iMaxMatches;
  392. int m_iMaxQueryTime;
  393. uint32 m_iMinID;
  394. uint32 m_iMaxID;
  395. int m_iFilters;
  396. CSphSEFilter m_dFilters[SPHINXSE_MAX_FILTERS];
  397. ESphGroupBy m_eGroupFunc;
  398. char * m_sGroupBy;
  399. char * m_sGroupSortBy;
  400. int m_iCutoff;
  401. int m_iRetryCount;
  402. int m_iRetryDelay;
  403. char * m_sGroupDistinct; ///< points to query buffer; do NOT delete
  404. int m_iIndexWeights;
  405. char * m_sIndexWeight[SPHINXSE_MAX_FILTERS]; ///< points to query buffer; do NOT delete
  406. int m_iIndexWeight[SPHINXSE_MAX_FILTERS];
  407. int m_iFieldWeights;
  408. char * m_sFieldWeight[SPHINXSE_MAX_FILTERS]; ///< points to query buffer; do NOT delete
  409. int m_iFieldWeight[SPHINXSE_MAX_FILTERS];
  410. bool m_bGeoAnchor;
  411. char * m_sGeoLatAttr;
  412. char * m_sGeoLongAttr;
  413. float m_fGeoLatitude;
  414. float m_fGeoLongitude;
  415. char * m_sComment;
  416. struct Override_t
  417. {
  418. union Value_t
  419. {
  420. uint32 m_uValue;
  421. longlong m_iValue64;
  422. float m_fValue;
  423. };
  424. char * m_sName; ///< points to query buffer
  425. int m_iType;
  426. Dynamic_array<ulonglong> m_dIds;
  427. Dynamic_array<Value_t> m_dValues;
  428. };
  429. Dynamic_array<Override_t *> m_dOverrides;
  430. public:
  431. char m_sParseError[256];
  432. public:
  433. CSphSEQuery ( const char * sQuery, int iLength, const char * sIndex );
  434. ~CSphSEQuery ();
  435. bool Parse ();
  436. int BuildRequest ( char ** ppBuffer );
  437. protected:
  438. char * m_pBuf;
  439. char * m_pCur;
  440. int m_iBufLeft;
  441. bool m_bBufOverrun;
  442. template < typename T > int ParseArray ( T ** ppValues, const char * sValue );
  443. bool ParseField ( char * sField );
  444. void SendBytes ( const void * pBytes, int iBytes );
  445. void SendWord ( short int v ) { v = ntohs(v); SendBytes ( &v, sizeof(short int) ); }
  446. void SendInt ( int v ) { v = ntohl(v); SendBytes ( &v, sizeof(int) ); }
  447. void SendDword ( uint v ) { v = ntohl(v) ;SendBytes ( &v, sizeof(uint) ); }
  448. void SendUint64 ( ulonglong v ) { SendDword ( uint(v>>32) ); SendDword ( uint(v&0xFFFFFFFFUL) ); }
  449. void SendString ( const char * v ) { int iLen = strlen(v); SendDword(iLen); SendBytes ( v, iLen ); }
  450. void SendFloat ( float v ) { SendDword ( sphF2DW(v) ); }
  451. };
  452. template int CSphSEQuery::ParseArray<uint32> ( uint32 **, const char * );
  453. template int CSphSEQuery::ParseArray<longlong> ( longlong **, const char * );
  454. //////////////////////////////////////////////////////////////////////////////
  455. #if MYSQL_VERSION_ID>50100
  456. #if MYSQL_VERSION_ID<50114
  457. #error Sphinx SE requires MySQL 5.1.14 or higher if compiling for 5.1.x series!
  458. #endif
  459. static handler * sphinx_create_handler ( handlerton * hton, TABLE_SHARE * table, MEM_ROOT * mem_root );
  460. static int sphinx_init_func ( void * p );
  461. static int sphinx_close_connection ( handlerton * hton, THD * thd );
  462. static int sphinx_panic ( handlerton * hton, enum ha_panic_function flag );
  463. static bool sphinx_show_status ( handlerton * hton, THD * thd, stat_print_fn * stat_print, enum ha_stat_type stat_type );
  464. #else
  465. static bool sphinx_init_func_for_handlerton ();
  466. static int sphinx_close_connection ( THD * thd );
  467. bool sphinx_show_status ( THD * thd );
  468. #endif // >50100
  469. //////////////////////////////////////////////////////////////////////////////
  470. static const char sphinx_hton_name[] = "SPHINX";
  471. static const char sphinx_hton_comment[] = "Sphinx storage engine " SPHINX_VERSION;
  472. #if MYSQL_VERSION_ID<50100
  473. handlerton sphinx_hton =
  474. {
  475. #ifdef MYSQL_HANDLERTON_INTERFACE_VERSION
  476. MYSQL_HANDLERTON_INTERFACE_VERSION,
  477. #endif
  478. sphinx_hton_name,
  479. SHOW_OPTION_YES,
  480. sphinx_hton_comment,
  481. DB_TYPE_SPHINX_DB,
  482. sphinx_init_func_for_handlerton,
  483. 0, // slot
  484. 0, // savepoint size
  485. sphinx_close_connection, // close_connection
  486. NULL, // savepoint
  487. NULL, // rollback to savepoint
  488. NULL, // release savepoint
  489. NULL, // commit
  490. NULL, // rollback
  491. NULL, // prepare
  492. NULL, // recover
  493. NULL, // commit_by_xid
  494. NULL, // rollback_by_xid
  495. NULL, // create_cursor_read_view
  496. NULL, // set_cursor_read_view
  497. NULL, // close_cursor_read_view
  498. HTON_CAN_RECREATE
  499. };
  500. #else
  501. static handlerton * sphinx_hton_ptr = NULL;
  502. #endif
  503. //////////////////////////////////////////////////////////////////////////////
  504. // variables for Sphinx shared methods
  505. pthread_mutex_t sphinx_mutex; // mutex to init the hash
  506. static int sphinx_init = 0; // flag whether the hash was initialized
  507. static HASH sphinx_open_tables; // hash used to track open tables
  508. //////////////////////////////////////////////////////////////////////////////
  509. // INITIALIZATION AND SHUTDOWN
  510. //////////////////////////////////////////////////////////////////////////////
  511. // hashing function
  512. #if MYSQL_VERSION_ID>=50120
  513. typedef size_t GetKeyLength_t;
  514. #else
  515. typedef uint GetKeyLength_t;
  516. #endif
  517. static byte * sphinx_get_key ( const byte * pSharePtr, GetKeyLength_t * pLength, my_bool )
  518. {
  519. CSphSEShare * pShare = (CSphSEShare *) pSharePtr;
  520. *pLength = (size_t) pShare->m_iTableNameLen;
  521. return (byte*) pShare->m_sTable;
  522. }
  523. #if MYSQL_VERSION_ID<50100
  524. static int sphinx_init_func ( void * ) // to avoid unused arg warning
  525. #else
  526. static int sphinx_init_func ( void * p )
  527. #endif
  528. {
  529. SPH_ENTER_FUNC();
  530. if ( !sphinx_init )
  531. {
  532. sphinx_init = 1;
  533. VOID ( pthread_mutex_init ( &sphinx_mutex, MY_MUTEX_INIT_FAST ) );
  534. hash_init ( &sphinx_open_tables, system_charset_info, 32, 0, 0,
  535. sphinx_get_key, 0, 0 );
  536. #if MYSQL_VERSION_ID > 50100
  537. handlerton * hton = (handlerton*) p;
  538. hton->state = SHOW_OPTION_YES;
  539. hton->db_type = DB_TYPE_AUTOASSIGN;
  540. hton->create = sphinx_create_handler;
  541. hton->close_connection = sphinx_close_connection;
  542. hton->show_status = sphinx_show_status;
  543. hton->panic = sphinx_panic;
  544. hton->flags = HTON_CAN_RECREATE;
  545. sphinx_hton_ptr = hton;
  546. #endif
  547. }
  548. SPH_RET(0);
  549. }
  550. #if MYSQL_VERSION_ID<50100
  551. static bool sphinx_init_func_for_handlerton ()
  552. {
  553. return sphinx_init_func ( &sphinx_hton );
  554. }
  555. #endif
  556. #if MYSQL_VERSION_ID>50100
  557. static int sphinx_close_connection ( handlerton * hton, THD * thd )
  558. {
  559. // deallocate common handler data
  560. SPH_ENTER_FUNC();
  561. void ** tmp = thd_ha_data ( thd, hton );
  562. CSphSEThreadData * pTls = (CSphSEThreadData*) (*tmp);
  563. SafeDelete ( pTls );
  564. *tmp = NULL;
  565. SPH_RET(0);
  566. }
  567. static int sphinx_done_func ( void * )
  568. {
  569. SPH_ENTER_FUNC();
  570. int error = 0;
  571. if ( sphinx_init )
  572. {
  573. sphinx_init = 0;
  574. if ( sphinx_open_tables.records )
  575. error = 1;
  576. hash_free ( &sphinx_open_tables );
  577. pthread_mutex_destroy ( &sphinx_mutex );
  578. }
  579. SPH_RET(error);
  580. }
  581. static int sphinx_panic ( handlerton * hton, enum ha_panic_function )
  582. {
  583. return sphinx_done_func ( hton );
  584. }
  585. #else
  586. static int sphinx_close_connection ( THD * thd )
  587. {
  588. // deallocate common handler data
  589. SPH_ENTER_FUNC();
  590. CSphSEThreadData * pTls = (CSphSEThreadData*) thd->ha_data[sphinx_hton.slot];
  591. SafeDelete ( pTls );
  592. thd->ha_data[sphinx_hton.slot] = NULL;
  593. SPH_RET(0);
  594. }
  595. #endif // >50100
  596. //////////////////////////////////////////////////////////////////////////////
  597. // SHOW STATUS
  598. //////////////////////////////////////////////////////////////////////////////
  599. #if MYSQL_VERSION_ID>50100
  600. static bool sphinx_show_status ( handlerton * hton, THD * thd, stat_print_fn * stat_print,
  601. enum ha_stat_type )
  602. #else
  603. bool sphinx_show_status ( THD * thd )
  604. #endif
  605. {
  606. SPH_ENTER_FUNC();
  607. #if MYSQL_VERSION_ID<50100
  608. Protocol * protocol = thd->protocol;
  609. List<Item> field_list;
  610. #endif
  611. char buf1[IO_SIZE];
  612. uint buf1len;
  613. char buf2[IO_SIZE];
  614. uint buf2len= 0;
  615. String words;
  616. buf1[0] = '\0';
  617. buf2[0] = '\0';
  618. #if MYSQL_VERSION_ID>50100
  619. CSphSEThreadData * pTls = (CSphSEThreadData*) ( *thd_ha_data ( thd, hton ) );
  620. #else
  621. if ( have_sphinx_db!=SHOW_OPTION_YES )
  622. {
  623. my_message ( ER_NOT_SUPPORTED_YET,
  624. "failed to call SHOW SPHINX STATUS: --skip-sphinx was specified",
  625. MYF(0) );
  626. SPH_RET(TRUE);
  627. }
  628. CSphSEThreadData * pTls = (CSphSEThreadData*) thd->ha_data[sphinx_hton.slot];
  629. #endif
  630. if ( pTls && pTls->m_bStats )
  631. {
  632. const CSphSEStats * pStats = &pTls->m_tStats;
  633. buf1len = my_snprintf ( buf1, sizeof(buf1),
  634. "total: %d, total found: %d, time: %d, words: %d",
  635. pStats->m_iMatchesTotal, pStats->m_iMatchesFound, pStats->m_iQueryMsec, pStats->m_iWords );
  636. #if MYSQL_VERSION_ID>50100
  637. stat_print ( thd, sphinx_hton_name, strlen(sphinx_hton_name),
  638. STRING_WITH_LEN("stats"), buf1, buf1len );
  639. #else
  640. field_list.push_back ( new Item_empty_string ( "Type",10 ) );
  641. field_list.push_back ( new Item_empty_string ( "Name",FN_REFLEN ) );
  642. field_list.push_back ( new Item_empty_string ( "Status",10 ) );
  643. if ( protocol->send_fields ( &field_list, Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF ) )
  644. SPH_RET(TRUE);
  645. protocol->prepare_for_resend ();
  646. protocol->store ( STRING_WITH_LEN("SPHINX"), system_charset_info );
  647. protocol->store ( STRING_WITH_LEN("stats"), system_charset_info );
  648. protocol->store ( buf1, buf1len, system_charset_info );
  649. if ( protocol->write() )
  650. SPH_RET(TRUE);
  651. #endif
  652. if ( pStats->m_iWords )
  653. {
  654. for ( int i=0; i<pStats->m_iWords; i++ )
  655. {
  656. CSphSEWordStats & tWord = pStats->m_dWords[i];
  657. buf2len = my_snprintf ( buf2, sizeof(buf2), "%s%s:%d:%d ",
  658. buf2, tWord.m_sWord, tWord.m_iDocs, tWord.m_iHits );
  659. }
  660. // convert it if we can
  661. const char * sWord = buf2;
  662. int iWord = buf2len;
  663. String sBuf3;
  664. if ( pTls->m_pQueryCharset )
  665. {
  666. uint iErrors;
  667. sBuf3.copy ( buf2, buf2len, pTls->m_pQueryCharset, system_charset_info, &iErrors );
  668. sWord = sBuf3.c_ptr();
  669. iWord = sBuf3.length();
  670. }
  671. #if MYSQL_VERSION_ID>50100
  672. stat_print ( thd, sphinx_hton_name, strlen(sphinx_hton_name),
  673. STRING_WITH_LEN("words"), sWord, iWord );
  674. #else
  675. protocol->prepare_for_resend ();
  676. protocol->store ( STRING_WITH_LEN("SPHINX"), system_charset_info );
  677. protocol->store ( STRING_WITH_LEN("words"), system_charset_info );
  678. protocol->store ( sWord, iWord, system_charset_info );
  679. if ( protocol->write() )
  680. SPH_RET(TRUE);
  681. #endif
  682. }
  683. // send last error or warning
  684. if ( pStats->m_sLastMessage && pStats->m_sLastMessage[0] )
  685. {
  686. const char * sMessageType = pStats->m_bLastError ? "error" : "warning";
  687. #if MYSQL_VERSION_ID>50100
  688. stat_print ( thd, sphinx_hton_name, strlen(sphinx_hton_name),
  689. sMessageType, strlen(sMessageType), pStats->m_sLastMessage, strlen(pStats->m_sLastMessage) );
  690. #else
  691. protocol->prepare_for_resend ();
  692. protocol->store ( STRING_WITH_LEN("SPHINX"), system_charset_info );
  693. protocol->store ( sMessageType, strlen(sMessageType), system_charset_info );
  694. protocol->store ( pStats->m_sLastMessage, strlen(pStats->m_sLastMessage), system_charset_info );
  695. if ( protocol->write() )
  696. SPH_RET(TRUE);
  697. #endif
  698. }
  699. } else
  700. {
  701. #if MYSQL_VERSION_ID < 50100
  702. field_list.push_back ( new Item_empty_string ( "Type", 10 ) );
  703. field_list.push_back ( new Item_empty_string ( "Name", FN_REFLEN ) );
  704. field_list.push_back ( new Item_empty_string ( "Status", 10 ) );
  705. if ( protocol->send_fields ( &field_list, Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF ) )
  706. SPH_RET(TRUE);
  707. protocol->prepare_for_resend ();
  708. protocol->store ( STRING_WITH_LEN("SPHINX"), system_charset_info );
  709. protocol->store ( STRING_WITH_LEN("stats"), system_charset_info );
  710. protocol->store ( STRING_WITH_LEN("no query has been executed yet"), system_charset_info );
  711. if ( protocol->write() )
  712. SPH_RET(TRUE);
  713. #endif
  714. }
  715. #if MYSQL_VERSION_ID < 50100
  716. send_eof(thd);
  717. #endif
  718. SPH_RET(FALSE);
  719. }
  720. //////////////////////////////////////////////////////////////////////////////
  721. // HELPERS
  722. //////////////////////////////////////////////////////////////////////////////
  723. static char * sphDup ( const char * sSrc, int iLen=-1 )
  724. {
  725. if ( !sSrc )
  726. return NULL;
  727. if ( iLen<0 )
  728. iLen = strlen(sSrc);
  729. char * sRes = new char [ 1+iLen ];
  730. memcpy ( sRes, sSrc, iLen );
  731. sRes[iLen] = '\0';
  732. return sRes;
  733. }
  734. static void sphLogError ( const char * sFmt, ... )
  735. {
  736. // emit timestamp
  737. #ifdef __WIN__
  738. SYSTEMTIME t;
  739. GetLocalTime ( &t );
  740. fprintf ( stderr, "%02d%02d%02d %2d:%02d:%02d SphinxSE: internal error: ",
  741. (int)t.wYear % 100, (int)t.wMonth, (int)t.wDay,
  742. (int)t.wHour, (int)t.wMinute, (int)t.wSecond );
  743. #else
  744. // Unix version
  745. time_t tStamp;
  746. time ( &tStamp );
  747. struct tm * pParsed;
  748. #ifdef HAVE_LOCALTIME_R
  749. struct tm tParsed;
  750. localtime_r ( &tStamp, &tParsed );
  751. pParsed = &tParsed;
  752. #else
  753. pParsed = localtime ( &tStamp );
  754. #endif // HAVE_LOCALTIME_R
  755. fprintf ( stderr, "%02d%02d%02d %2d:%02d:%02d SphinxSE: internal error: ",
  756. pParsed->tm_year % 100, pParsed->tm_mon + 1, pParsed->tm_mday,
  757. pParsed->tm_hour, pParsed->tm_min, pParsed->tm_sec);
  758. #endif // __WIN__
  759. // emit message
  760. va_list ap;
  761. va_start ( ap, sFmt );
  762. vfprintf ( stderr, sFmt, ap );
  763. va_end ( ap );
  764. // emit newline
  765. fprintf ( stderr, "\n" );
  766. }
  767. // the following scheme variants are recognized
  768. //
  769. // sphinx://host/index
  770. // sphinx://host:port/index
  771. // unix://unix/domain/socket:index
  772. // unix://unix/domain/socket
  773. static bool ParseUrl ( CSphSEShare * share, TABLE * table, bool bCreate )
  774. {
  775. SPH_ENTER_FUNC();
  776. if ( share )
  777. {
  778. // check incoming stuff
  779. if ( !table )
  780. {
  781. sphLogError ( "table==NULL in ParseUrl()" );
  782. return false;
  783. }
  784. if ( !table->s )
  785. {
  786. sphLogError ( "(table->s)==NULL in ParseUrl()" );
  787. return false;
  788. }
  789. // free old stuff
  790. share->ResetTable ();
  791. // fill new stuff
  792. share->m_iTableFields = table->s->fields;
  793. if ( share->m_iTableFields )
  794. {
  795. share->m_sTableField = new char * [ share->m_iTableFields ];
  796. share->m_eTableFieldType = new enum_field_types [ share->m_iTableFields ];
  797. for ( int i=0; i<share->m_iTableFields; i++ )
  798. {
  799. share->m_sTableField[i] = sphDup ( table->field[i]->field_name );
  800. share->m_eTableFieldType[i] = table->field[i]->type();
  801. }
  802. }
  803. }
  804. char * sScheme = NULL;
  805. char * sHost = (char*) SPHINXSE_DEFAULT_HOST;
  806. char * sIndex = (char*) SPHINXSE_DEFAULT_INDEX;
  807. int iPort = SPHINXSE_DEFAULT_PORT;
  808. bool bOk = true;
  809. while ( table->s->connect_string.length!=0 )
  810. {
  811. bOk = false;
  812. sScheme = sphDup ( table->s->connect_string.str, table->s->connect_string.length );
  813. sHost = strstr ( sScheme, "://" );
  814. if ( !sHost )
  815. break;
  816. sHost[0] = '\0';
  817. sHost += 2;
  818. if ( !strcmp ( sScheme, "unix" ) )
  819. {
  820. // unix-domain socket
  821. iPort = 0;
  822. if (!( sIndex = strrchr ( sHost, ':' ) ))
  823. sIndex = (char*) SPHINXSE_DEFAULT_INDEX;
  824. else
  825. {
  826. *sIndex++ = '\0';
  827. if ( !*sIndex )
  828. sIndex = (char*) SPHINXSE_DEFAULT_INDEX;
  829. }
  830. bOk = true;
  831. break;
  832. }
  833. if( strcmp ( sScheme, "sphinx" )!=0 && strcmp ( sScheme, "inet" )!=0 )
  834. break;
  835. // tcp
  836. sHost++;
  837. char * sPort = strchr ( sHost, ':' );
  838. if ( sPort )
  839. {
  840. *sPort++ = '\0';
  841. if ( *sPort )
  842. {
  843. sIndex = strchr ( sPort, '/' );
  844. if ( sIndex )
  845. *sIndex++ = '\0';
  846. else
  847. sIndex = (char*) SPHINXSE_DEFAULT_INDEX;
  848. iPort = atoi(sPort);
  849. if ( !iPort )
  850. iPort = SPHINXSE_DEFAULT_PORT;
  851. }
  852. } else
  853. {
  854. sIndex = strchr ( sHost, '/' );
  855. if ( sIndex )
  856. *sIndex++ = '\0';
  857. else
  858. sIndex = (char*) SPHINXSE_DEFAULT_INDEX;
  859. }
  860. bOk = true;
  861. break;
  862. }
  863. if ( !bOk )
  864. {
  865. my_error ( bCreate ? ER_FOREIGN_DATA_STRING_INVALID_CANT_CREATE : ER_FOREIGN_DATA_STRING_INVALID,
  866. MYF(0), table->s->connect_string );
  867. } else
  868. {
  869. if ( share )
  870. {
  871. SafeDeleteArray ( share->m_sScheme );
  872. share->m_sScheme = sScheme;
  873. share->m_sHost = sHost;
  874. share->m_sIndex = sIndex;
  875. share->m_iPort = (ushort)iPort;
  876. }
  877. }
  878. if ( !bOk && !share )
  879. SafeDeleteArray ( sScheme );
  880. SPH_RET(bOk);
  881. }
  882. // Example of simple lock controls. The "share" it creates is structure we will
  883. // pass to each sphinx handler. Do you have to have one of these? Well, you have
  884. // pieces that are used for locking, and they are needed to function.
  885. static CSphSEShare * get_share ( const char * table_name, TABLE * table )
  886. {
  887. SPH_ENTER_FUNC();
  888. pthread_mutex_lock ( &sphinx_mutex );
  889. CSphSEShare * pShare = NULL;
  890. for ( ;; )
  891. {
  892. // check if we already have this share
  893. #if MYSQL_VERSION_ID>=50120
  894. pShare = (CSphSEShare*) hash_search ( &sphinx_open_tables, (const uchar *) table_name, strlen(table_name) );
  895. #else
  896. #ifdef __WIN__
  897. pShare = (CSphSEShare*) hash_search ( &sphinx_open_tables, (const byte *) table_name, strlen(table_name) );
  898. #else
  899. pShare = (CSphSEShare*) hash_search ( &sphinx_open_tables, table_name, strlen(table_name) );
  900. #endif // win
  901. #endif // pre-5.1.20
  902. if ( pShare )
  903. {
  904. pShare->m_iUseCount++;
  905. break;
  906. }
  907. // try to allocate new share
  908. pShare = new CSphSEShare ();
  909. if ( !pShare )
  910. break;
  911. // try to setup it
  912. pShare->m_pTableQueryCharset = table->field[2]->charset();
  913. if ( !ParseUrl ( pShare, table, false ) )
  914. {
  915. SafeDelete ( pShare );
  916. break;
  917. }
  918. // try to hash it
  919. pShare->m_iTableNameLen = strlen(table_name);
  920. pShare->m_sTable = sphDup ( table_name );
  921. if ( my_hash_insert ( &sphinx_open_tables, (const byte *)pShare ) )
  922. {
  923. SafeDelete ( pShare );
  924. break;
  925. }
  926. // all seems fine
  927. break;
  928. }
  929. pthread_mutex_unlock ( &sphinx_mutex );
  930. SPH_RET(pShare);
  931. }
  932. // Free lock controls. We call this whenever we close a table. If the table had
  933. // the last reference to the share then we free memory associated with it.
  934. static int free_share ( CSphSEShare * pShare )
  935. {
  936. SPH_ENTER_FUNC();
  937. pthread_mutex_lock ( &sphinx_mutex );
  938. if ( !--pShare->m_iUseCount )
  939. {
  940. hash_delete ( &sphinx_open_tables, (byte *)pShare );
  941. SafeDelete ( pShare );
  942. }
  943. pthread_mutex_unlock ( &sphinx_mutex );
  944. SPH_RET(0);
  945. }
  946. #if MYSQL_VERSION_ID>50100
  947. static handler * sphinx_create_handler ( handlerton * hton, TABLE_SHARE * table, MEM_ROOT * mem_root )
  948. {
  949. return new ( mem_root ) ha_sphinx ( hton, table );
  950. }
  951. #endif
  952. //////////////////////////////////////////////////////////////////////////////
  953. // CLIENT-SIDE REQUEST STUFF
  954. //////////////////////////////////////////////////////////////////////////////
  955. CSphSEQuery::CSphSEQuery ( const char * sQuery, int iLength, const char * sIndex )
  956. : m_sHost ( "" )
  957. , m_iPort ( 0 )
  958. , m_sIndex ( sIndex ? sIndex : (char*) "*" )
  959. , m_iOffset ( 0 )
  960. , m_iLimit ( 20 )
  961. , m_bQuery ( false )
  962. , m_sQuery ( (char*) "" )
  963. , m_pWeights ( NULL )
  964. , m_iWeights ( 0 )
  965. , m_eMode ( SPH_MATCH_ALL )
  966. , m_eRanker ( SPH_RANK_PROXIMITY_BM25 )
  967. , m_eSort ( SPH_SORT_RELEVANCE )
  968. , m_sSortBy ( (char*) "" )
  969. , m_iMaxMatches ( 1000 )
  970. , m_iMaxQueryTime ( 0 )
  971. , m_iMinID ( 0 )
  972. , m_iMaxID ( 0 )
  973. , m_iFilters ( 0 )
  974. , m_eGroupFunc ( SPH_GROUPBY_DAY )
  975. , m_sGroupBy ( (char*) "" )
  976. , m_sGroupSortBy ( (char*) "@group desc" )
  977. , m_iCutoff ( 0 )
  978. , m_iRetryCount ( 0 )
  979. , m_iRetryDelay ( 0 )
  980. , m_sGroupDistinct ( (char*) "" )
  981. , m_iIndexWeights ( 0 )
  982. , m_iFieldWeights ( 0 )
  983. , m_bGeoAnchor ( false )
  984. , m_sGeoLatAttr ( (char*) "" )
  985. , m_sGeoLongAttr ( (char*) "" )
  986. , m_fGeoLatitude ( 0.0f )
  987. , m_fGeoLongitude ( 0.0f )
  988. , m_sComment ( (char*) "" )
  989. , m_pBuf ( NULL )
  990. , m_pCur ( NULL )
  991. , m_iBufLeft ( 0 )
  992. , m_bBufOverrun ( false )
  993. {
  994. m_sQueryBuffer = new char [ iLength+2 ];
  995. memcpy ( m_sQueryBuffer, sQuery, iLength );
  996. m_sQueryBuffer[iLength]= ';';
  997. m_sQueryBuffer[iLength+1]= '\0';
  998. }
  999. CSphSEQuery::~CSphSEQuery ()
  1000. {
  1001. SPH_ENTER_METHOD();
  1002. SafeDeleteArray ( m_sQueryBuffer );
  1003. SafeDeleteArray ( m_pWeights );
  1004. SafeDeleteArray ( m_pBuf );
  1005. for ( int i=0; i<m_dOverrides.elements(); i++ )
  1006. SafeDelete ( m_dOverrides.at(i) );
  1007. SPH_VOID_RET();
  1008. }
  1009. template < typename T >
  1010. int CSphSEQuery::ParseArray ( T ** ppValues, const char * sValue )
  1011. {
  1012. SPH_ENTER_METHOD();
  1013. assert ( ppValues );
  1014. assert ( !(*ppValues) );
  1015. const char * pValue;
  1016. bool bPrevDigit = false;
  1017. int iValues = 0;
  1018. // count the values
  1019. for ( pValue=sValue; *pValue; pValue++ )
  1020. {
  1021. bool bDigit = (*pValue)>='0' && (*pValue)<='9';
  1022. if ( bDigit && !bPrevDigit )
  1023. iValues++;
  1024. bPrevDigit = bDigit;
  1025. }
  1026. if ( !iValues )
  1027. SPH_RET(0);
  1028. // extract the values
  1029. T * pValues = new T [ iValues ];
  1030. *ppValues = pValues;
  1031. int iIndex = 0, iSign = 1;
  1032. T uValue = 0;
  1033. bPrevDigit = false;
  1034. for ( pValue=sValue ;; pValue++ )
  1035. {
  1036. bool bDigit = (*pValue)>='0' && (*pValue)<='9';
  1037. if ( bDigit )
  1038. {
  1039. if ( !bPrevDigit )
  1040. uValue = 0;
  1041. uValue = uValue*10 + ( (*pValue)-'0' );
  1042. }
  1043. else if ( bPrevDigit )
  1044. {
  1045. assert ( iIndex<iValues );
  1046. pValues [ iIndex++ ] = uValue * iSign;
  1047. iSign = 1;
  1048. }
  1049. else if ( *pValue=='-' )
  1050. iSign = -1;
  1051. bPrevDigit = bDigit;
  1052. if ( !*pValue )
  1053. break;
  1054. }
  1055. SPH_RET(iValues);
  1056. }
  1057. static char * chop ( char * s )
  1058. {
  1059. while ( *s && isspace(*s) )
  1060. s++;
  1061. char * p = s + strlen(s);
  1062. while ( p>s && isspace(p[-1]) )
  1063. p--;
  1064. *p = '\0';
  1065. return s;
  1066. }
  1067. static bool myisattr ( char c )
  1068. {
  1069. return
  1070. ( c>='0' && c<='9' ) ||
  1071. ( c>='a' && c<='z' ) ||
  1072. ( c>='A' && c<='Z' ) ||
  1073. c=='_';
  1074. }
  1075. bool CSphSEQuery::ParseField ( char * sField )
  1076. {
  1077. SPH_ENTER_METHOD();
  1078. // look for option name/value separator
  1079. char * sValue = strchr ( sField, '=' );
  1080. if ( !sValue || sValue==sField || sValue[-1]=='\\' )
  1081. {
  1082. // by default let's assume it's just query
  1083. if ( sField[0] )
  1084. {
  1085. if ( m_bQuery )
  1086. {
  1087. snprintf ( m_sParseError, sizeof(m_sParseError), "search query already specified; '%s' is redundant", sField );
  1088. SPH_RET(false);
  1089. } else
  1090. {
  1091. m_sQuery = sField;
  1092. m_bQuery = true;
  1093. // unescape
  1094. char *s = sField, *d = sField;
  1095. while ( *s )
  1096. {
  1097. if ( *s!='\\' ) *d++ = *s;
  1098. s++;
  1099. }
  1100. *d = '\0';
  1101. }
  1102. }
  1103. SPH_RET(true);
  1104. }
  1105. // split
  1106. *sValue++ = '\0';
  1107. sValue = chop ( sValue );
  1108. int iValue = atoi ( sValue );
  1109. // handle options
  1110. char * sName = chop ( sField );
  1111. if ( !strcmp ( sName, "query" ) ) m_sQuery = sValue;
  1112. else if ( !strcmp ( sName, "host" ) ) m_sHost = sValue;
  1113. else if ( !strcmp ( sName, "port" ) ) m_iPort = iValue;
  1114. else if ( !strcmp ( sName, "index" ) ) m_sIndex = sValue;
  1115. else if ( !strcmp ( sName, "offset" ) ) m_iOffset = iValue;
  1116. else if ( !strcmp ( sName, "limit" ) ) m_iLimit = iValue;
  1117. else if ( !strcmp ( sName, "weights" ) ) m_iWeights = ParseArray<uint32> ( &m_pWeights, sValue );
  1118. else if ( !strcmp ( sName, "minid" ) ) m_iMinID = iValue;
  1119. else if ( !strcmp ( sName, "maxid" ) ) m_iMaxID = iValue;
  1120. else if ( !strcmp ( sName, "maxmatches" ) ) m_iMaxMatches = iValue;
  1121. else if ( !strcmp ( sName, "maxquerytime" ) ) m_iMaxQueryTime = iValue;
  1122. else if ( !strcmp ( sName, "groupsort" ) ) m_sGroupSortBy = sValue;
  1123. else if ( !strcmp ( sName, "distinct" ) ) m_sGroupDistinct = sValue;
  1124. else if ( !strcmp ( sName, "cutoff" ) ) m_iCutoff = iValue;
  1125. else if ( !strcmp ( sName, "comment" ) ) m_sComment = sValue;
  1126. else if ( !strcmp ( sName, "mode" ) )
  1127. {
  1128. m_eMode = SPH_MATCH_ALL;
  1129. if ( !strcmp ( sValue, "any") ) m_eMode = SPH_MATCH_ANY;
  1130. else if ( !strcmp ( sValue, "phrase" ) ) m_eMode = SPH_MATCH_PHRASE;
  1131. else if ( !strcmp ( sValue, "boolean") ) m_eMode = SPH_MATCH_BOOLEAN;
  1132. else if ( !strcmp ( sValue, "ext") ) m_eMode = SPH_MATCH_EXTENDED;
  1133. else if ( !strcmp ( sValue, "extended") ) m_eMode = SPH_MATCH_EXTENDED;
  1134. else if ( !strcmp ( sValue, "ext2") ) m_eMode = SPH_MATCH_EXTENDED2;
  1135. else if ( !strcmp ( sValue, "extended2") ) m_eMode = SPH_MATCH_EXTENDED2;
  1136. else if ( !strcmp ( sValue, "all") ) m_eMode = SPH_MATCH_ALL;
  1137. else if ( !strcmp ( sValue, "fullscan") ) m_eMode = SPH_MATCH_FULLSCAN;
  1138. else
  1139. {
  1140. snprintf ( m_sParseError, sizeof(m_sParseError), "unknown matching mode '%s'", sValue );
  1141. SPH_RET(false);
  1142. }
  1143. } else if ( !strcmp ( sName, "ranker" ) )
  1144. {
  1145. m_eRanker = SPH_RANK_PROXIMITY_BM25;
  1146. if ( !strcmp ( sValue, "proximity_bm25") ) m_eRanker = SPH_RANK_PROXIMITY_BM25;
  1147. else if ( !strcmp ( sValue, "bm25" ) ) m_eRanker = SPH_RANK_BM25;
  1148. else if ( !strcmp ( sValue, "none" ) ) m_eRanker = SPH_RANK_NONE;
  1149. else if ( !strcmp ( sValue, "wordcount" ) ) m_eRanker = SPH_RANK_WORDCOUNT;
  1150. else if ( !strcmp ( sValue, "proximity" ) ) m_eRanker = SPH_RANK_PROXIMITY;
  1151. else if ( !strcmp ( sValue, "matchany" ) ) m_eRanker = SPH_RANK_MATCHANY;
  1152. else if ( !strcmp ( sValue, "fieldmask" ) ) m_eRanker = SPH_RANK_FIELDMASK;
  1153. else
  1154. {
  1155. snprintf ( m_sParseError, sizeof(m_sParseError), "unknown ranking mode '%s'", sValue );
  1156. SPH_RET(false);
  1157. }
  1158. } else if ( !strcmp ( sName, "sort" ) )
  1159. {
  1160. static const struct
  1161. {
  1162. const char * m_sName;
  1163. ESphSortOrder m_eSort;
  1164. } dSortModes[] =
  1165. {
  1166. { "relevance", SPH_SORT_RELEVANCE },
  1167. { "attr_desc:", SPH_SORT_ATTR_DESC },
  1168. { "attr_asc:", SPH_SORT_ATTR_ASC },
  1169. { "time_segments:", SPH_SORT_TIME_SEGMENTS },
  1170. { "extended:", SPH_SORT_EXTENDED },
  1171. { "expr:", SPH_SORT_EXPR }
  1172. };
  1173. int i;
  1174. const int nModes = sizeof(dSortModes)/sizeof(dSortModes[0]);
  1175. for ( i=0; i<nModes; i++ )
  1176. if ( !strncmp ( sValue, dSortModes[i].m_sName, strlen(dSortModes[i].m_sName) ) )
  1177. {
  1178. m_eSort = dSortModes[i].m_eSort;
  1179. m_sSortBy = sValue + strlen(dSortModes[i].m_sName);
  1180. break;
  1181. }
  1182. if ( i==nModes )
  1183. {
  1184. snprintf ( m_sParseError, sizeof(m_sParseError), "unknown sorting mode '%s'", sValue );
  1185. SPH_RET(false);
  1186. }
  1187. } else if ( !strcmp ( sName, "groupby" ) )
  1188. {
  1189. static const struct
  1190. {
  1191. const char * m_sName;
  1192. ESphGroupBy m_eFunc;
  1193. } dGroupModes[] =
  1194. {
  1195. { "day:", SPH_GROUPBY_DAY },
  1196. { "week:", SPH_GROUPBY_WEEK },
  1197. { "month:", SPH_GROUPBY_MONTH },
  1198. { "year:", SPH_GROUPBY_YEAR },
  1199. { "attr:", SPH_GROUPBY_ATTR },
  1200. };
  1201. int i;
  1202. const int nModes = sizeof(dGroupModes)/sizeof(dGroupModes[0]);
  1203. for ( i=0; i<nModes; i++ )
  1204. if ( !strncmp ( sValue, dGroupModes[i].m_sName, strlen(dGroupModes[i].m_sName) ) )
  1205. {
  1206. m_eGroupFunc = dGroupModes[i].m_eFunc;
  1207. m_sGroupBy = sValue + strlen(dGroupModes[i].m_sName);
  1208. break;
  1209. }
  1210. if ( i==nModes )
  1211. {
  1212. snprintf ( m_sParseError, sizeof(m_sParseError), "unknown groupby mode '%s'", sValue );
  1213. SPH_RET(false);
  1214. }
  1215. } else if ( m_iFilters<SPHINXSE_MAX_FILTERS &&
  1216. ( !strcmp ( sName, "range" ) || !strcmp ( sName, "!range" ) || !strcmp ( sName, "floatrange" ) || !strcmp ( sName, "!floatrange" ) ) )
  1217. {
  1218. for ( ;; )
  1219. {
  1220. char * p = sName;
  1221. CSphSEFilter & tFilter = m_dFilters [ m_iFilters ];
  1222. tFilter.m_bExclude = ( *p=='!' ); if ( tFilter.m_bExclude ) p++;
  1223. tFilter.m_eType = ( *p=='f' ) ? SPH_FILTER_FLOATRANGE : SPH_FILTER_RANGE;
  1224. if (!( p = strchr ( sValue, ',' ) ))
  1225. break;
  1226. *p++ = '\0';
  1227. tFilter.m_sAttrName = chop ( sValue );
  1228. sValue = p;
  1229. if (!( p = strchr ( sValue, ',' ) ))
  1230. break;
  1231. *p++ = '\0';
  1232. if ( tFilter.m_eType==SPH_FILTER_RANGE )
  1233. {
  1234. tFilter.m_uMinValue = strtoll ( sValue, NULL, 0 );
  1235. tFilter.m_uMaxValue = strtoll ( p, NULL, 0 );
  1236. } else
  1237. {
  1238. tFilter.m_fMinValue = (float)atof(sValue);
  1239. tFilter.m_fMaxValue = (float)atof(p);
  1240. }
  1241. // all ok
  1242. m_iFilters++;
  1243. break;
  1244. }
  1245. } else if ( m_iFilters<SPHINXSE_MAX_FILTERS &&
  1246. ( !strcmp ( sName, "filter" ) || !strcmp ( sName, "!filter" ) ) )
  1247. {
  1248. for ( ;; )
  1249. {
  1250. CSphSEFilter & tFilter = m_dFilters [ m_iFilters ];
  1251. tFilter.m_eType = SPH_FILTER_VALUES;
  1252. tFilter.m_bExclude = ( strcmp ( sName, "!filter")==0 );
  1253. // get the attr name
  1254. while ( (*sValue) && !myisattr(*sValue) )
  1255. sValue++;
  1256. if ( !*sValue )
  1257. break;
  1258. tFilter.m_sAttrName = sValue;
  1259. while ( (*sValue) && myisattr(*sValue) )
  1260. sValue++;
  1261. if ( !*sValue )
  1262. break;
  1263. *sValue++ = '\0';
  1264. // get the values
  1265. tFilter.m_iValues = ParseArray<longlong> ( &tFilter.m_pValues, sValue );
  1266. if ( !tFilter.m_iValues )
  1267. {
  1268. assert ( !tFilter.m_pValues );
  1269. break;
  1270. }
  1271. // all ok
  1272. m_iFilters++;
  1273. break;
  1274. }
  1275. } else if ( !strcmp ( sName, "indexweights" ) || !strcmp ( sName, "fieldweights" ) )
  1276. {
  1277. bool bIndex = !strcmp ( sName, "indexweights" );
  1278. int * pCount = bIndex ? &m_iIndexWeights : &m_iFieldWeights;
  1279. char ** pNames = bIndex ? &m_sIndexWeight[0] : &m_sFieldWeight[0];
  1280. int * pWeights = bIndex ? &m_iIndexWeight[0] : &m_iFieldWeight[0];
  1281. *pCount = 0;
  1282. char * p = sValue;
  1283. while ( *p && *pCount<SPHINXSE_MAX_FILTERS )
  1284. {
  1285. // extract attr name
  1286. if ( !myisattr(*p) )
  1287. {
  1288. snprintf ( m_sParseError, sizeof(m_sParseError), "%s: index name expected near '%s'", sName, p );
  1289. SPH_RET(false);
  1290. }
  1291. pNames[*pCount] = p;
  1292. while ( myisattr(*p) ) p++;
  1293. if ( *p!=',' )
  1294. {
  1295. snprintf ( m_sParseError, sizeof(m_sParseError), "%s: comma expected near '%s'", sName, p );
  1296. SPH_RET(false);
  1297. }
  1298. *p++ = '\0';
  1299. // extract attr value
  1300. char * sVal = p;
  1301. while ( isdigit(*p) ) p++;
  1302. if ( p==sVal )
  1303. {
  1304. snprintf ( m_sParseError, sizeof(m_sParseError), "%s: integer weight expected near '%s'", sName, sVal );
  1305. SPH_RET(false);
  1306. }
  1307. pWeights[*pCount] = atoi(sVal);
  1308. (*pCount)++;
  1309. if ( !*p ) break;
  1310. if ( *p!=',' )
  1311. {
  1312. snprintf ( m_sParseError, sizeof(m_sParseError), "%s: comma expected near '%s'", sName, p );
  1313. SPH_RET(false);
  1314. }
  1315. p++;
  1316. }
  1317. } else if ( !strcmp ( sName, "geoanchor" ) )
  1318. {
  1319. m_bGeoAnchor = false;
  1320. for ( ;; )
  1321. {
  1322. char * sLat = sValue;
  1323. char * p = sValue;
  1324. if (!( p = strchr ( p, ',' ) )) break; *p++ = '\0';
  1325. char * sLong = p;
  1326. if (!( p = strchr ( p, ',' ) )) break; *p++ = '\0';
  1327. char * sLatVal = p;
  1328. if (!( p = strchr ( p, ',' ) )) break; *p++ = '\0';
  1329. char * sLongVal = p;
  1330. m_sGeoLatAttr = chop(sLat);
  1331. m_sGeoLongAttr = chop(sLong);
  1332. m_fGeoLatitude = (float)atof(sLatVal);
  1333. m_fGeoLongitude = (float)atof(sLongVal);
  1334. m_bGeoAnchor = true;
  1335. break;
  1336. }
  1337. if ( !m_bGeoAnchor )
  1338. {
  1339. snprintf ( m_sParseError, sizeof(m_sParseError), "geoanchor: parse error, not enough comma-separated arguments" );
  1340. SPH_RET(false);
  1341. }
  1342. }
  1343. else if ( !strcmp ( sName, "override" ) ) // name,type,id:value,id:value,...
  1344. {
  1345. char * sName = NULL;
  1346. int iType = 0;
  1347. CSphSEQuery::Override_t * pOverride = NULL;
  1348. // get name and type
  1349. char * sRest = sValue;
  1350. for ( ;; )
  1351. {
  1352. sName = sRest;
  1353. if ( !*sName )
  1354. break;
  1355. if (!( sRest = strchr ( sRest, ',' ) )) break; *sRest++ = '\0';
  1356. char * sType = sRest;
  1357. if (!( sRest = strchr ( sRest, ',' ) )) break;
  1358. static const struct
  1359. {
  1360. const char * m_sName;
  1361. int m_iType;
  1362. }
  1363. dAttrTypes[] =
  1364. {
  1365. { "int", SPH_ATTR_INTEGER },
  1366. { "timestamp", SPH_ATTR_TIMESTAMP },
  1367. { "bool", SPH_ATTR_BOOL },
  1368. { "float", SPH_ATTR_FLOAT },
  1369. { "bigint", SPH_ATTR_BIGINT }
  1370. };
  1371. for ( uint i=0; i<sizeof(dAttrTypes)/sizeof(*dAttrTypes); i++ )
  1372. if ( !strncmp( sType, dAttrTypes[i].m_sName, sRest - sType ) )
  1373. {
  1374. iType = dAttrTypes[i].m_iType;
  1375. break;
  1376. }
  1377. break;
  1378. }
  1379. // fail
  1380. if ( !sName || !*sName || !iType )
  1381. {
  1382. snprintf ( m_sParseError, sizeof(m_sParseError), "override: malformed query" );
  1383. SPH_RET(false);
  1384. }
  1385. // grab id:value pairs
  1386. sRest++;
  1387. while ( sRest )
  1388. {
  1389. char * sId = sRest;
  1390. if (!( sRest = strchr ( sRest, ':' ) )) break; *sRest++ = '\0';
  1391. if (!( sRest - sId )) break;
  1392. char * sValue = sRest;
  1393. if (( sRest = strchr ( sRest, ',' ) )) *sRest++ = '\0';
  1394. if ( !*sValue )
  1395. break;
  1396. if ( !pOverride )
  1397. {
  1398. pOverride = new CSphSEQuery::Override_t;
  1399. pOverride->m_sName = chop(sName);
  1400. pOverride->m_iType = iType;
  1401. m_dOverrides.append(pOverride);
  1402. }
  1403. ulonglong uId = strtoull ( sId, NULL, 10 );
  1404. CSphSEQuery::Override_t::Value_t tValue;
  1405. if ( iType == SPH_ATTR_FLOAT )
  1406. tValue.m_fValue = (float)atof(sValue);
  1407. else if ( iType == SPH_ATTR_BIGINT )
  1408. tValue.m_iValue64 = strtoll ( sValue, NULL, 10 );
  1409. else
  1410. tValue.m_uValue = (uint32)strtoul ( sValue, NULL, 10 );
  1411. pOverride->m_dIds.append ( uId );
  1412. pOverride->m_dValues.append ( tValue );
  1413. }
  1414. if ( !pOverride )
  1415. {
  1416. snprintf ( m_sParseError, sizeof(m_sParseError), "override: id:value mapping expected" );
  1417. SPH_RET(false);
  1418. }
  1419. SPH_RET(true);
  1420. }
  1421. else
  1422. {
  1423. snprintf ( m_sParseError, sizeof(m_sParseError), "unknown parameter '%s'", sName );
  1424. SPH_RET(false);
  1425. }
  1426. // !COMMIT handle syntax errors
  1427. SPH_RET(true);
  1428. }
  1429. bool CSphSEQuery::Parse ()
  1430. {
  1431. SPH_ENTER_METHOD();
  1432. SPH_DEBUG ( "query [[ %s ]]", m_sQueryBuffer );
  1433. m_bQuery = false;
  1434. char * pCur = m_sQueryBuffer;
  1435. char * pNext = pCur;
  1436. while (( pNext = strchr ( pNext, ';' ) ))
  1437. {
  1438. // handle escaped semicolons
  1439. if ( pNext>m_sQueryBuffer && pNext[-1]=='\\' && pNext[1]!='\0' )
  1440. {
  1441. pNext++;
  1442. continue;
  1443. }
  1444. // handle semicolon-separated clauses
  1445. *pNext++ = '\0';
  1446. if ( !ParseField ( pCur ) )
  1447. SPH_RET(false);
  1448. pCur = pNext;
  1449. }
  1450. SPH_RET(true);
  1451. }
  1452. void CSphSEQuery::SendBytes ( const void * pBytes, int iBytes )
  1453. {
  1454. SPH_ENTER_METHOD();
  1455. if ( m_iBufLeft<iBytes )
  1456. {
  1457. m_bBufOverrun = true;
  1458. SPH_VOID_RET();
  1459. }
  1460. memcpy ( m_pCur, pBytes, iBytes );
  1461. m_pCur += iBytes;
  1462. m_iBufLeft -= iBytes;
  1463. SPH_VOID_RET();
  1464. }
  1465. int CSphSEQuery::BuildRequest ( char ** ppBuffer )
  1466. {
  1467. SPH_ENTER_METHOD();
  1468. // calc request length
  1469. int iReqSize = 124 + 4*m_iWeights
  1470. + strlen ( m_sSortBy )
  1471. + strlen ( m_sQuery )
  1472. + strlen ( m_sIndex )
  1473. + strlen ( m_sGroupBy )
  1474. + strlen ( m_sGroupSortBy )
  1475. + strlen ( m_sGroupDistinct )
  1476. + strlen ( m_sComment );
  1477. for ( int i=0; i<m_iFilters; i++ )
  1478. {
  1479. const CSphSEFilter & tFilter = m_dFilters[i];
  1480. iReqSize += 12 + strlen ( tFilter.m_sAttrName ); // string attr-name; int type; int exclude-flag
  1481. switch ( tFilter.m_eType )
  1482. {
  1483. case SPH_FILTER_VALUES: iReqSize += 4 + 8*tFilter.m_iValues; break;
  1484. case SPH_FILTER_RANGE: iReqSize += 16; break;
  1485. case SPH_FILTER_FLOATRANGE: iReqSize += 8; break;
  1486. }
  1487. }
  1488. if ( m_bGeoAnchor ) // 1.14+
  1489. iReqSize += 16 + strlen ( m_sGeoLatAttr ) + strlen ( m_sGeoLongAttr );
  1490. for ( int i=0; i<m_iIndexWeights; i++ ) // 1.15+
  1491. iReqSize += 8 + strlen(m_sIndexWeight[i] );
  1492. for ( int i=0; i<m_iFieldWeights; i++ ) // 1.18+
  1493. iReqSize += 8 + strlen(m_sFieldWeight[i] );
  1494. // overrides
  1495. iReqSize += 4;
  1496. for ( int i=0; i<m_dOverrides.elements(); i++ )
  1497. {
  1498. CSphSEQuery::Override_t * pOverride = m_dOverrides.at(i);
  1499. const uint32 uSize = pOverride->m_iType == SPH_ATTR_BIGINT ? 16 : 12; // id64 + value
  1500. iReqSize += strlen ( pOverride->m_sName ) + 12 + uSize*pOverride->m_dIds.elements();
  1501. }
  1502. // select
  1503. iReqSize += 4;
  1504. m_iBufLeft = 0;
  1505. SafeDeleteArray ( m_pBuf );
  1506. m_pBuf = new char [ iReqSize ];
  1507. if ( !m_pBuf )
  1508. SPH_RET(-1);
  1509. m_pCur = m_pBuf;
  1510. m_iBufLeft = iReqSize;
  1511. m_bBufOverrun = false;
  1512. (*ppBuffer) = m_pBuf;
  1513. // build request
  1514. SendWord ( SEARCHD_COMMAND_SEARCH ); // command id
  1515. SendWord ( VER_COMMAND_SEARCH ); // command version
  1516. SendInt ( iReqSize-8 ); // packet body length
  1517. SendInt ( 1 ); // number of queries
  1518. SendInt ( m_iOffset );
  1519. SendInt ( m_iLimit );
  1520. SendInt ( m_eMode );
  1521. SendInt ( m_eRanker ); // 1.16+
  1522. SendInt ( m_eSort );
  1523. SendString ( m_sSortBy ); // sort attr
  1524. SendString ( m_sQuery ); // query
  1525. SendInt ( m_iWeights );
  1526. for ( int j=0; j<m_iWeights; j++ )
  1527. SendInt ( m_pWeights[j] ); // weights
  1528. SendString ( m_sIndex ); // indexes
  1529. SendInt ( 1 ); // id64 range follows
  1530. SendUint64 ( m_iMinID ); // id/ts ranges
  1531. SendUint64 ( m_iMaxID );
  1532. SendInt ( m_iFilters );
  1533. for ( int j=0; j<m_iFilters; j++ )
  1534. {
  1535. const CSphSEFilter & tFilter = m_dFilters[j];
  1536. SendString ( tFilter.m_sAttrName );
  1537. SendInt ( tFilter.m_eType );
  1538. switch ( tFilter.m_eType )
  1539. {
  1540. case SPH_FILTER_VALUES:
  1541. SendInt ( tFilter.m_iValues );
  1542. for ( int k=0; k<tFilter.m_iValues; k++ )
  1543. SendUint64 ( tFilter.m_pValues[k] );
  1544. break;
  1545. case SPH_FILTER_RANGE:
  1546. SendUint64 ( tFilter.m_uMinValue );
  1547. SendUint64 ( tFilter.m_uMaxValue );
  1548. break;
  1549. case SPH_FILTER_FLOATRANGE:
  1550. SendFloat ( tFilter.m_fMinValue );
  1551. SendFloat ( tFilter.m_fMaxValue );
  1552. break;
  1553. }
  1554. SendInt ( tFilter.m_bExclude );
  1555. }
  1556. SendInt ( m_eGroupFunc );
  1557. SendString ( m_sGroupBy );
  1558. SendInt ( m_iMaxMatches );
  1559. SendString ( m_sGroupSortBy );
  1560. SendInt ( m_iCutoff ); // 1.9+
  1561. SendInt ( m_iRetryCount ); // 1.10+
  1562. SendInt ( m_iRetryDelay );
  1563. SendString ( m_sGroupDistinct ); // 1.11+
  1564. SendInt ( m_bGeoAnchor ); // 1.14+
  1565. if ( m_bGeoAnchor )
  1566. {
  1567. SendString ( m_sGeoLatAttr );
  1568. SendString ( m_sGeoLongAttr );
  1569. SendFloat ( m_fGeoLatitude );
  1570. SendFloat ( m_fGeoLongitude );
  1571. }
  1572. SendInt ( m_iIndexWeights ); // 1.15+
  1573. for ( int i=0; i<m_iIndexWeights; i++ )
  1574. {
  1575. SendString ( m_sIndexWeight[i] );
  1576. SendInt ( m_iIndexWeight[i] );
  1577. }
  1578. SendInt ( m_iMaxQueryTime ); // 1.17+
  1579. SendInt ( m_iFieldWeights ); // 1.18+
  1580. for ( int i=0; i<m_iFieldWeights; i++ )
  1581. {
  1582. SendString ( m_sFieldWeight[i] );
  1583. SendInt ( m_iFieldWeight[i] );
  1584. }
  1585. SendString ( m_sComment );
  1586. // overrides
  1587. SendInt ( m_dOverrides.elements() );
  1588. for ( int i=0; i<m_dOverrides.elements(); i++ )
  1589. {
  1590. CSphSEQuery::Override_t * pOverride = m_dOverrides.at(i);
  1591. SendString ( pOverride->m_sName );
  1592. SendDword ( pOverride->m_iType );
  1593. SendInt ( pOverride->m_dIds.elements() );
  1594. for ( int j=0; j<pOverride->m_dIds.elements(); j++ )
  1595. {
  1596. SendUint64 ( pOverride->m_dIds.at(j) );
  1597. if ( pOverride->m_iType == SPH_ATTR_FLOAT )
  1598. SendFloat ( pOverride->m_dValues.at(j).m_fValue );
  1599. else if ( pOverride->m_iType == SPH_ATTR_BIGINT )
  1600. SendUint64 ( pOverride->m_dValues.at(j).m_iValue64 );
  1601. else
  1602. SendDword ( pOverride->m_dValues.at(j).m_uValue );
  1603. }
  1604. }
  1605. // select
  1606. SendString ( "" );
  1607. // detect buffer overruns and underruns, and report internal error
  1608. if ( m_bBufOverrun || m_iBufLeft!=0 || m_pCur-m_pBuf!=iReqSize )
  1609. SPH_RET(-1);
  1610. // all fine
  1611. SPH_RET(iReqSize);
  1612. }
  1613. //////////////////////////////////////////////////////////////////////////////
  1614. // SPHINX HANDLER
  1615. //////////////////////////////////////////////////////////////////////////////
  1616. static const char * ha_sphinx_exts[] = { NullS };
  1617. #if MYSQL_VERSION_ID<50100
  1618. ha_sphinx::ha_sphinx ( TABLE_ARG * table )
  1619. : handler ( &sphinx_hton, table )
  1620. #else
  1621. ha_sphinx::ha_sphinx ( handlerton * hton, TABLE_ARG * table )
  1622. : handler ( hton, table )
  1623. #endif
  1624. , m_pShare ( NULL )
  1625. , m_iMatchesTotal ( 0 )
  1626. , m_iCurrentPos ( 0 )
  1627. , m_pCurrentKey ( NULL )
  1628. , m_iCurrentKeyLen ( 0 )
  1629. , m_pResponse ( NULL )
  1630. , m_pResponseEnd ( NULL )
  1631. , m_pCur ( NULL )
  1632. , m_bUnpackError ( false )
  1633. , m_iFields ( 0 )
  1634. , m_dFields ( NULL )
  1635. , m_iAttrs ( 0 )
  1636. , m_dAttrs ( NULL )
  1637. , m_bId64 ( 0 )
  1638. , m_dUnboundFields ( NULL )
  1639. {
  1640. SPH_ENTER_METHOD();
  1641. SPH_VOID_RET();
  1642. }
  1643. // If frm_error() is called then we will use this to to find out what file extentions
  1644. // exist for the storage engine. This is also used by the default rename_table and
  1645. // delete_table method in handler.cc.
  1646. const char ** ha_sphinx::bas_ext() const
  1647. {
  1648. return ha_sphinx_exts;
  1649. }
  1650. // Used for opening tables. The name will be the name of the file.
  1651. // A table is opened when it needs to be opened. For instance
  1652. // when a request comes in for a select on the table (tables are not
  1653. // open and closed for each request, they are cached).
  1654. //
  1655. // Called from handler.cc by handler::ha_open(). The server opens all tables by
  1656. // calling ha_open() which then calls the handler specific open().
  1657. int ha_sphinx::open ( const char * name, int, uint )
  1658. {
  1659. SPH_ENTER_METHOD();
  1660. m_pShare = get_share ( name, table );
  1661. if ( !m_pShare )
  1662. SPH_RET(1);
  1663. thr_lock_data_init ( &m_pShare->m_tLock, &m_tLock, NULL );
  1664. *thd_ha_data ( table->in_use, ht ) = NULL;
  1665. SPH_RET(0);
  1666. }
  1667. int ha_sphinx::ConnectToSearchd ( const char * sQueryHost, int iQueryPort )
  1668. {
  1669. SPH_ENTER_METHOD();
  1670. struct sockaddr_in sin;
  1671. #ifndef __WIN__
  1672. struct sockaddr_un saun;
  1673. #endif
  1674. int iDomain = 0;
  1675. int iSockaddrSize = 0;
  1676. struct sockaddr * pSockaddr = NULL;
  1677. in_addr_t ip_addr;
  1678. int version;
  1679. uint uClientVersion = htonl ( SPHINX_SEARCHD_PROTO );
  1680. const char * sHost = ( sQueryHost && *sQueryHost ) ? sQueryHost : m_pShare->m_sHost;
  1681. ushort iPort = iQueryPort ? (ushort)iQueryPort : m_pShare->m_iPort;
  1682. if ( iPort )
  1683. {
  1684. iDomain = AF_INET;
  1685. iSockaddrSize = sizeof(sin);
  1686. pSockaddr = (struct sockaddr *) &sin;
  1687. memset ( &sin, 0, sizeof(sin) );
  1688. sin.sin_family = AF_INET;
  1689. sin.sin_port = htons(iPort);
  1690. // prepare host address
  1691. if ( (int)( ip_addr=inet_addr(sHost) ) != (int)INADDR_NONE )
  1692. {
  1693. memcpy ( &sin.sin_addr, &ip_addr, sizeof(ip_addr) );
  1694. } else
  1695. {
  1696. int tmp_errno;
  1697. struct hostent tmp_hostent, *hp;
  1698. char buff2 [ GETHOSTBYNAME_BUFF_SIZE ];
  1699. hp = my_gethostbyname_r ( sHost, &tmp_hostent,
  1700. buff2, sizeof(buff2), &tmp_errno );
  1701. if ( !hp )
  1702. {
  1703. my_gethostbyname_r_free();
  1704. char sError[256];
  1705. my_snprintf ( sError, sizeof(sError), "failed to resolve searchd host (name=%s)", sHost );
  1706. my_error ( ER_CONNECT_TO_FOREIGN_DATA_SOURCE, MYF(0), sError );
  1707. SPH_RET(-1);
  1708. }
  1709. memcpy ( &sin.sin_addr, hp->h_addr,
  1710. Min ( sizeof(sin.sin_addr), (size_t)hp->h_length ) );
  1711. my_gethostbyname_r_free();
  1712. }
  1713. } else
  1714. {
  1715. #ifndef __WIN__
  1716. iDomain = AF_UNIX;
  1717. iSockaddrSize = sizeof(saun);
  1718. pSockaddr = (struct sockaddr *) &saun;
  1719. memset ( &saun, 0, sizeof(saun) );
  1720. saun.sun_family = AF_UNIX;
  1721. strncpy ( saun.sun_path, sHost, sizeof(saun.sun_path)-1 );
  1722. #else
  1723. my_error ( ER_CONNECT_TO_FOREIGN_DATA_SOURCE, MYF(0), "UNIX sockets are not supported on Windows" );
  1724. SPH_RET(-1);
  1725. #endif
  1726. }
  1727. char sError[512];
  1728. int iSocket = socket ( iDomain, SOCK_STREAM, 0 );
  1729. if ( iSocket<0 )
  1730. {
  1731. my_error ( ER_CONNECT_TO_FOREIGN_DATA_SOURCE, MYF(0), "failed to create client socket" );
  1732. SPH_RET(-1);
  1733. }
  1734. if ( connect ( iSocket, pSockaddr, iSockaddrSize )<0 )
  1735. {
  1736. sphSockClose ( iSocket );
  1737. my_snprintf ( sError, sizeof(sError), "failed to connect to searchd (host=%s, errno=%d, port=%d)",
  1738. sHost, errno, iPort );
  1739. my_error ( ER_CONNECT_TO_FOREIGN_DATA_SOURCE, MYF(0), sError );
  1740. SPH_RET(-1);
  1741. }
  1742. if ( ::recv ( iSocket, (char *)&version, sizeof(version), 0 )!=sizeof(version) )
  1743. {
  1744. sphSockClose ( iSocket );
  1745. my_snprintf ( sError, sizeof(sError), "failed to receive searchd version (host=%s, port=%d)",
  1746. sHost, iPort );
  1747. my_error ( ER_CONNECT_TO_FOREIGN_DATA_SOURCE, MYF(0), sError );
  1748. SPH_RET(-1);
  1749. }
  1750. if ( ::send ( iSocket, (char*)&uClientVersion, sizeof(uClientVersion), 0 )!=sizeof(uClientVersion) )
  1751. {
  1752. sphSockClose ( iSocket );
  1753. my_snprintf ( sError, sizeof(sError), "failed to send client version (host=%s, port=%d)",
  1754. sHost, iPort );
  1755. my_error ( ER_CONNECT_TO_FOREIGN_DATA_SOURCE, MYF(0), sError );
  1756. SPH_RET(-1);
  1757. }
  1758. SPH_RET(iSocket);
  1759. }
  1760. // Closes a table. We call the free_share() function to free any resources
  1761. // that we have allocated in the "shared" structure.
  1762. //
  1763. // Called from sql_base.cc, sql_select.cc, and table.cc.
  1764. // In sql_select.cc it is only used to close up temporary tables or during
  1765. // the process where a temporary table is converted over to being a
  1766. // myisam table.
  1767. // For sql_base.cc look at close_data_tables().
  1768. int ha_sphinx::close()
  1769. {
  1770. SPH_ENTER_METHOD();
  1771. SPH_RET ( free_share(m_pShare) );
  1772. }
  1773. int ha_sphinx::write_row ( uchar * )
  1774. {
  1775. SPH_ENTER_METHOD();
  1776. SPH_RET ( HA_ERR_WRONG_COMMAND );
  1777. }
  1778. int ha_sphinx::update_row ( const uchar *, uchar * )
  1779. {
  1780. SPH_ENTER_METHOD();
  1781. SPH_RET ( HA_ERR_WRONG_COMMAND );
  1782. }
  1783. int ha_sphinx::delete_row ( const uchar * )
  1784. {
  1785. SPH_ENTER_METHOD();
  1786. SPH_RET ( HA_ERR_WRONG_COMMAND );
  1787. }
  1788. // keynr is key (index) number
  1789. // sorted is 1 if result MUST be sorted according to index
  1790. int ha_sphinx::index_init ( uint keynr, bool )
  1791. {
  1792. SPH_ENTER_METHOD();
  1793. active_index = keynr;
  1794. SPH_RET(0);
  1795. }
  1796. int ha_sphinx::index_end()
  1797. {
  1798. SPH_ENTER_METHOD();
  1799. SPH_RET(0);
  1800. }
  1801. uint32 ha_sphinx::UnpackDword ()
  1802. {
  1803. if ( m_pCur+sizeof(uint32)>m_pResponseEnd )
  1804. {
  1805. m_pCur = m_pResponseEnd;
  1806. m_bUnpackError = true;
  1807. return 0;
  1808. }
  1809. uint32 uRes = ntohl ( sphUnalignedRead ( *(uint32*)m_pCur ) );
  1810. m_pCur += sizeof(uint32);
  1811. return uRes;
  1812. }
  1813. char * ha_sphinx::UnpackString ()
  1814. {
  1815. uint32 iLen = UnpackDword ();
  1816. if ( !iLen )
  1817. return NULL;
  1818. if ( m_pCur+iLen>m_pResponseEnd )
  1819. {
  1820. m_pCur = m_pResponseEnd;
  1821. m_bUnpackError = true;
  1822. return NULL;
  1823. }
  1824. char * sRes = new char [ 1+iLen ];
  1825. memcpy ( sRes, m_pCur, iLen );
  1826. sRes[iLen] = '\0';
  1827. m_pCur += iLen;
  1828. return sRes;
  1829. }
  1830. static inline const char * FixNull ( const char * s )
  1831. {
  1832. return s ? s : "(null)";
  1833. }
  1834. bool ha_sphinx::UnpackSchema ()
  1835. {
  1836. SPH_ENTER_METHOD();
  1837. // cleanup
  1838. if ( m_dFields )
  1839. for ( int i=0; i<(int)m_iFields; i++ )
  1840. SafeDeleteArray ( m_dFields[i] );
  1841. SafeDeleteArray ( m_dFields );
  1842. // unpack network packet
  1843. uint32 uStatus = UnpackDword ();
  1844. char * sMessage = NULL;
  1845. if ( uStatus!=SEARCHD_OK )
  1846. {
  1847. sMessage = UnpackString ();
  1848. CSphSEThreadData * pTls = GetTls ();
  1849. if ( pTls )
  1850. {
  1851. strncpy ( pTls->m_tStats.m_sLastMessage, sMessage, sizeof(pTls->m_tStats.m_sLastMessage) );
  1852. pTls->m_tStats.m_bLastError = ( uStatus==SEARCHD_ERROR );
  1853. }
  1854. if ( uStatus==SEARCHD_ERROR )
  1855. {
  1856. char sError[1024];
  1857. my_snprintf ( sError, sizeof(sError), "searchd error: %s", sMessage );
  1858. my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), sError );
  1859. SafeDeleteArray ( sMessage );
  1860. SPH_RET ( false );
  1861. }
  1862. }
  1863. m_iFields = UnpackDword ();
  1864. m_dFields = new char * [ m_iFields ];
  1865. if ( !m_dFields )
  1866. {
  1867. my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "INTERNAL ERROR: UnpackSchema() failed (fields alloc error)" );
  1868. SPH_RET(false);
  1869. }
  1870. for ( uint32 i=0; i<m_iFields; i++ )
  1871. m_dFields[i] = UnpackString ();
  1872. SafeDeleteArray ( m_dAttrs );
  1873. m_iAttrs = UnpackDword ();
  1874. m_dAttrs = new CSphSEAttr [ m_iAttrs ];
  1875. if ( !m_dAttrs )
  1876. {
  1877. for ( int i=0; i<(int)m_iFields; i++ )
  1878. SafeDeleteArray ( m_dFields[i] );
  1879. SafeDeleteArray ( m_dFields );
  1880. my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "INTERNAL ERROR: UnpackSchema() failed (attrs alloc error)" );
  1881. SPH_RET(false);
  1882. }
  1883. for ( uint32 i=0; i<m_iAttrs; i++ )
  1884. {
  1885. m_dAttrs[i].m_sName = UnpackString ();
  1886. m_dAttrs[i].m_uType = UnpackDword ();
  1887. if ( m_bUnpackError ) // m_sName may be null
  1888. break;
  1889. m_dAttrs[i].m_iField = -1;
  1890. for ( int j=SPHINXSE_SYSTEM_COLUMNS; j<m_pShare->m_iTableFields; j++ )
  1891. {
  1892. const char * sTableField = m_pShare->m_sTableField[j];
  1893. const char * sAttrField = m_dAttrs[i].m_sName;
  1894. if ( m_dAttrs[i].m_sName[0]=='@' )
  1895. {
  1896. const char * sAtPrefix = "_sph_";
  1897. if ( strncmp ( sTableField, sAtPrefix, strlen(sAtPrefix) ) )
  1898. continue;
  1899. sTableField += strlen(sAtPrefix);
  1900. sAttrField++;
  1901. }
  1902. if ( !strcasecmp ( sAttrField, sTableField ) )
  1903. {
  1904. // we're almost good, but
  1905. // let's enforce that timestamp columns can only receive timestamp attributes
  1906. if ( m_pShare->m_eTableFieldType[j]!=MYSQL_TYPE_TIMESTAMP || m_dAttrs[i].m_uType==SPH_ATTR_TIMESTAMP )
  1907. m_dAttrs[i].m_iField = j;
  1908. break;
  1909. }
  1910. }
  1911. }
  1912. m_iMatchesTotal = UnpackDword ();
  1913. m_bId64 = UnpackDword ();
  1914. if ( m_bId64 && m_pShare->m_eTableFieldType[0] != MYSQL_TYPE_LONGLONG )
  1915. {
  1916. my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "INTERNAL ERROR: 1st column must be bigint to accept 64-bit DOCIDs" );
  1917. SPH_RET(false);
  1918. }
  1919. // network packet unpacked; build unbound fields map
  1920. SafeDeleteArray ( m_dUnboundFields );
  1921. m_dUnboundFields = new int [ m_pShare->m_iTableFields ];
  1922. for ( int i=0; i<m_pShare->m_iTableFields; i++ )
  1923. {
  1924. if ( i<SPHINXSE_SYSTEM_COLUMNS )
  1925. m_dUnboundFields[i] = SPH_ATTR_NONE;
  1926. else if ( m_pShare->m_eTableFieldType[i]==MYSQL_TYPE_TIMESTAMP )
  1927. m_dUnboundFields[i] = SPH_ATTR_TIMESTAMP;
  1928. else
  1929. m_dUnboundFields[i] = SPH_ATTR_INTEGER;
  1930. }
  1931. for ( uint32 i=0; i<m_iAttrs; i++ )
  1932. if ( m_dAttrs[i].m_iField>=0 )
  1933. m_dUnboundFields [ m_dAttrs[i].m_iField ] = SPH_ATTR_NONE;
  1934. if ( m_bUnpackError )
  1935. my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "INTERNAL ERROR: UnpackSchema() failed (unpack error)" );
  1936. SPH_RET(!m_bUnpackError);
  1937. }
  1938. bool ha_sphinx::UnpackStats ( CSphSEStats * pStats )
  1939. {
  1940. assert ( pStats );
  1941. char * pCurSave = m_pCur;
  1942. for ( uint i=0; i<m_iMatchesTotal && m_pCur<m_pResponseEnd-sizeof(uint32); i++ )
  1943. {
  1944. m_pCur += m_bId64 ? 12 : 8; // skip id+weight
  1945. for ( uint32 i=0; i<m_iAttrs && m_pCur<m_pResponseEnd-sizeof(uint32); i++ )
  1946. {
  1947. if ( m_dAttrs[i].m_uType & SPH_ATTR_MULTI )
  1948. {
  1949. // skip MVA list
  1950. uint32 uCount = UnpackDword ();
  1951. m_pCur += uCount*4;
  1952. }
  1953. else // skip normal value
  1954. m_pCur += m_dAttrs[i].m_uType == SPH_ATTR_BIGINT ? 8 : 4;
  1955. }
  1956. }
  1957. pStats->m_iMatchesTotal = UnpackDword ();
  1958. pStats->m_iMatchesFound = UnpackDword ();
  1959. pStats->m_iQueryMsec = UnpackDword ();
  1960. pStats->m_iWords = UnpackDword ();
  1961. if ( m_bUnpackError )
  1962. return false;
  1963. SafeDeleteArray ( pStats->m_dWords );
  1964. if ( pStats->m_iWords<0 || pStats->m_iWords>=SPHINXSE_MAX_KEYWORDSTATS )
  1965. return false;
  1966. pStats->m_dWords = new CSphSEWordStats [ pStats->m_iWords ];
  1967. if ( !pStats->m_dWords )
  1968. return false;
  1969. for ( int i=0; i<pStats->m_iWords; i++ )
  1970. {
  1971. CSphSEWordStats & tWord = pStats->m_dWords[i];
  1972. tWord.m_sWord = UnpackString ();
  1973. tWord.m_iDocs = UnpackDword ();
  1974. tWord.m_iHits = UnpackDword ();
  1975. }
  1976. if ( m_bUnpackError )
  1977. return false;
  1978. m_pCur = pCurSave;
  1979. return true;
  1980. }
  1981. /// condition pushdown implementation, to properly intercept WHERE clauses on my columns
  1982. const COND * ha_sphinx::cond_push ( const COND * cond )
  1983. {
  1984. // catch the simplest case: query_column="some text"
  1985. for ( ;; )
  1986. {
  1987. if ( cond->type()!=COND::FUNC_ITEM )
  1988. break;
  1989. Item_func * condf = (Item_func *)cond;
  1990. if ( condf->functype()!=Item_func::EQ_FUNC || condf->argument_count()!=2 )
  1991. break;
  1992. Item ** args = condf->arguments();
  1993. if ( args[0]->type()!=COND::FIELD_ITEM || args[1]->type()!=COND::STRING_ITEM )
  1994. break;
  1995. Item_field * pField = (Item_field *) args[0];
  1996. if ( pField->field->field_index!=2 ) // FIXME! magic key index
  1997. break;
  1998. // get my tls
  1999. CSphSEThreadData * pTls = GetTls ();
  2000. if ( !pTls )
  2001. break;
  2002. // copy the query, and let know that we intercepted this condition
  2003. Item_string * pString = (Item_string *) args[1];
  2004. pTls->m_bQuery = true;
  2005. strncpy ( pTls->m_sQuery, pString->str_value.c_ptr(), sizeof(pTls->m_sQuery) );
  2006. pTls->m_sQuery[sizeof(pTls->m_sQuery)-1] = '\0';
  2007. pTls->m_pQueryCharset = pString->str_value.charset();
  2008. return NULL;
  2009. }
  2010. // don't change anything
  2011. return cond;
  2012. }
  2013. /// condition popup
  2014. void ha_sphinx::cond_pop ()
  2015. {
  2016. CSphSEThreadData * pTls = GetTls ();
  2017. if ( pTls && pTls->m_bQuery )
  2018. pTls->m_bQuery = false;
  2019. return;
  2020. }
  2021. /// get TLS (maybe allocate it, too)
  2022. CSphSEThreadData * ha_sphinx::GetTls()
  2023. {
  2024. // where do we store that pointer in today's version?
  2025. CSphSEThreadData ** ppTls;
  2026. ppTls = (CSphSEThreadData**) thd_ha_data ( ha_thd(), ht );
  2027. // allocate if needed
  2028. if ( !*ppTls )
  2029. *ppTls = new CSphSEThreadData ();
  2030. // errors will be handled by caller
  2031. return *ppTls;
  2032. }
  2033. // Positions an index cursor to the index specified in the handle. Fetches the
  2034. // row if available. If the key value is null, begin at the first key of the
  2035. // index.
  2036. int ha_sphinx::index_read ( byte * buf, const byte * key, uint key_len, enum ha_rkey_function )
  2037. {
  2038. SPH_ENTER_METHOD();
  2039. char sError[256];
  2040. // set new data for thd->ha_data, it is used in show_status
  2041. CSphSEThreadData * pTls = GetTls();
  2042. if ( !pTls )
  2043. {
  2044. my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "INTERNAL ERROR: TLS malloc() failed" );
  2045. SPH_RET ( HA_ERR_END_OF_FILE );
  2046. }
  2047. pTls->m_tStats.Reset ();
  2048. // parse query
  2049. if ( pTls->m_bQuery )
  2050. {
  2051. // we have a query from condition pushdown
  2052. m_pCurrentKey = (const byte *) pTls->m_sQuery;
  2053. m_iCurrentKeyLen = strlen(pTls->m_sQuery);
  2054. } else
  2055. {
  2056. // just use the key (might be truncated)
  2057. m_pCurrentKey = key+HA_KEY_BLOB_LENGTH;
  2058. m_iCurrentKeyLen = uint2korr(key); // or maybe key_len?
  2059. pTls->m_pQueryCharset = m_pShare ? m_pShare->m_pTableQueryCharset : NULL;
  2060. }
  2061. CSphSEQuery q ( (const char*)m_pCurrentKey, m_iCurrentKeyLen, m_pShare->m_sIndex );
  2062. if ( !q.Parse () )
  2063. {
  2064. my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), q.m_sParseError );
  2065. SPH_RET ( HA_ERR_END_OF_FILE );
  2066. }
  2067. // do connect
  2068. int iSocket = ConnectToSearchd ( q.m_sHost, q.m_iPort );
  2069. if ( iSocket<0 )
  2070. SPH_RET ( HA_ERR_END_OF_FILE );
  2071. // my buffer
  2072. char * pBuffer; // will be free by CSphSEQuery dtor; do NOT free manually
  2073. int iReqLen = q.BuildRequest ( &pBuffer );
  2074. if ( iReqLen<=0 )
  2075. {
  2076. my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "INTERNAL ERROR: q.BuildRequest() failed" );
  2077. SPH_RET ( HA_ERR_END_OF_FILE );
  2078. }
  2079. // send request
  2080. ::send ( iSocket, pBuffer, iReqLen, 0 );
  2081. // receive reply
  2082. char sHeader[8];
  2083. int iGot = ::recv ( iSocket, sHeader, sizeof(sHeader), RECV_FLAGS );
  2084. if ( iGot!=sizeof(sHeader) )
  2085. {
  2086. my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "failed to receive response header (searchd went away?)" );
  2087. SPH_RET ( HA_ERR_END_OF_FILE );
  2088. }
  2089. short int uRespStatus = ntohs ( sphUnalignedRead ( *(short int*)( &sHeader[0] ) ) );
  2090. short int uRespVersion = ntohs ( sphUnalignedRead ( *(short int*)( &sHeader[2] ) ) );
  2091. uint uRespLength = ntohl ( sphUnalignedRead ( *(uint *)( &sHeader[4] ) ) );
  2092. SPH_DEBUG ( "got response header (status=%d version=%d length=%d)",
  2093. uRespStatus, uRespVersion, uRespLength );
  2094. SafeDeleteArray ( m_pResponse );
  2095. if ( uRespLength<=SPHINXSE_MAX_ALLOC )
  2096. m_pResponse = new char [ uRespLength+1 ];
  2097. if ( !m_pResponse )
  2098. {
  2099. my_snprintf ( sError, sizeof(sError), "bad searchd response length (length=%u)", uRespLength );
  2100. my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), sError );
  2101. SPH_RET ( HA_ERR_END_OF_FILE );
  2102. }
  2103. int iRecvLength = 0;
  2104. while ( iRecvLength<(int)uRespLength )
  2105. {
  2106. int iRecv = ::recv ( iSocket, m_pResponse+iRecvLength, uRespLength-iRecvLength, RECV_FLAGS );
  2107. if ( iRecv<0 )
  2108. break;
  2109. iRecvLength += iRecv;
  2110. }
  2111. ::closesocket ( iSocket );
  2112. iSocket = -1;
  2113. if ( iRecvLength!=(int)uRespLength )
  2114. {
  2115. my_snprintf ( sError, sizeof(sError), "net read error (expected=%d, got=%d)", uRespLength, iRecvLength );
  2116. my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), sError );
  2117. SPH_RET ( HA_ERR_END_OF_FILE );
  2118. }
  2119. // we'll have a message, at least
  2120. pTls->m_bStats = true;
  2121. // parse reply
  2122. m_iCurrentPos = 0;
  2123. m_pCur = m_pResponse;
  2124. m_pResponseEnd = m_pResponse + uRespLength;
  2125. m_bUnpackError = false;
  2126. if ( uRespStatus!=SEARCHD_OK )
  2127. {
  2128. char * sMessage = UnpackString ();
  2129. if ( !sMessage )
  2130. {
  2131. my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "no valid response from searchd (status=%d, resplen=%d)",
  2132. uRespStatus, uRespLength );
  2133. SPH_RET ( HA_ERR_END_OF_FILE );
  2134. }
  2135. strncpy ( pTls->m_tStats.m_sLastMessage, sMessage, sizeof(pTls->m_tStats.m_sLastMessage) );
  2136. SafeDeleteArray ( sMessage );
  2137. if ( uRespStatus!=SEARCHD_WARNING )
  2138. {
  2139. my_snprintf ( sError, sizeof(sError), "searchd error: %s", pTls->m_tStats.m_sLastMessage );
  2140. my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), sError );
  2141. pTls->m_tStats.m_bLastError = true;
  2142. SPH_RET ( HA_ERR_END_OF_FILE );
  2143. }
  2144. }
  2145. if ( !UnpackSchema () )
  2146. SPH_RET ( HA_ERR_END_OF_FILE );
  2147. if ( !UnpackStats ( &pTls->m_tStats ) )
  2148. {
  2149. my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "INTERNAL ERROR: UnpackStats() failed" );
  2150. SPH_RET ( HA_ERR_END_OF_FILE );
  2151. }
  2152. SPH_RET ( get_rec ( buf, key, key_len ) );
  2153. }
  2154. // Positions an index cursor to the index specified in key. Fetches the
  2155. // row if any. This is only used to read whole keys.
  2156. int ha_sphinx::index_read_idx ( byte *, uint, const byte *, uint, enum ha_rkey_function )
  2157. {
  2158. SPH_ENTER_METHOD();
  2159. SPH_RET ( HA_ERR_WRONG_COMMAND );
  2160. }
  2161. // Used to read forward through the index.
  2162. int ha_sphinx::index_next ( byte * buf )
  2163. {
  2164. SPH_ENTER_METHOD();
  2165. SPH_RET ( get_rec ( buf, m_pCurrentKey, m_iCurrentKeyLen ) );
  2166. }
  2167. int ha_sphinx::index_next_same ( byte * buf, const byte * key, uint keylen )
  2168. {
  2169. SPH_ENTER_METHOD();
  2170. SPH_RET ( get_rec ( buf, key, keylen ) );
  2171. }
  2172. int ha_sphinx::get_rec ( byte * buf, const byte *, uint )
  2173. {
  2174. SPH_ENTER_METHOD();
  2175. if ( m_iCurrentPos>=m_iMatchesTotal )
  2176. {
  2177. SafeDeleteArray ( m_pResponse );
  2178. SPH_RET ( HA_ERR_END_OF_FILE );
  2179. }
  2180. #if MYSQL_VERSION_ID>50100
  2181. my_bitmap_map * org_bitmap = dbug_tmp_use_all_columns ( table, table->write_set );
  2182. #endif
  2183. Field ** field = table->field;
  2184. // unpack and return the match
  2185. longlong uMatchID = UnpackDword ();
  2186. if ( m_bId64 )
  2187. uMatchID = ( uMatchID<<32 ) + UnpackDword();
  2188. uint32 uMatchWeight = UnpackDword ();
  2189. field[0]->store ( uMatchID, 1 );
  2190. field[1]->store ( uMatchWeight, 1 );
  2191. field[2]->store ( (const char*)m_pCurrentKey, m_iCurrentKeyLen, &my_charset_bin );
  2192. for ( uint32 i=0; i<m_iAttrs; i++ )
  2193. {
  2194. longlong iValue64= 0;
  2195. uint32 uValue = UnpackDword ();
  2196. if ( m_dAttrs[i].m_uType == SPH_ATTR_BIGINT )
  2197. iValue64 = ( (longlong)uValue<<32 ) | UnpackDword();
  2198. if ( m_dAttrs[i].m_iField<0 )
  2199. {
  2200. // skip MVA
  2201. if ( m_dAttrs[i].m_uType & SPH_ATTR_MULTI )
  2202. for ( ; uValue>0 && !m_bUnpackError; uValue-- )
  2203. UnpackDword();
  2204. continue;
  2205. }
  2206. Field * af = field [ m_dAttrs[i].m_iField ];
  2207. switch ( m_dAttrs[i].m_uType )
  2208. {
  2209. case SPH_ATTR_INTEGER:
  2210. case SPH_ATTR_ORDINAL:
  2211. case SPH_ATTR_BOOL:
  2212. af->store ( uValue, 1 );
  2213. break;
  2214. case SPH_ATTR_FLOAT:
  2215. af->store ( sphDW2F(uValue) );
  2216. break;
  2217. case SPH_ATTR_TIMESTAMP:
  2218. if ( af->type()==MYSQL_TYPE_TIMESTAMP )
  2219. longstore ( af->ptr, uValue ); // because store() does not accept timestamps
  2220. else
  2221. af->store ( uValue, 1 );
  2222. break;
  2223. case SPH_ATTR_BIGINT:
  2224. af->store ( iValue64, 0 );
  2225. break;
  2226. case ( SPH_ATTR_MULTI | SPH_ATTR_INTEGER ):
  2227. if ( uValue<=0 )
  2228. {
  2229. // shortcut, empty MVA set
  2230. af->store ( "", 0, &my_charset_bin );
  2231. } else
  2232. {
  2233. // convert MVA set to comma-separated string
  2234. char sBuf[1024]; // FIXME! magic size
  2235. char * pCur = sBuf;
  2236. for ( ; uValue>0 && !m_bUnpackError; uValue-- )
  2237. {
  2238. uint32 uEntry = UnpackDword ();
  2239. if ( pCur < sBuf+sizeof(sBuf)-16 ) // 10 chars per 32bit value plus some safety bytes
  2240. {
  2241. sprintf ( pCur, "%u", uEntry );
  2242. while ( *pCur ) pCur++;
  2243. if ( uValue>1 )
  2244. *pCur++ = ','; // non-trailing commas
  2245. }
  2246. }
  2247. af->store ( sBuf, pCur-sBuf, &my_charset_bin );
  2248. }
  2249. break;
  2250. default:
  2251. my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "INTERNAL ERROR: unhandled attr type" );
  2252. SafeDeleteArray ( m_pResponse );
  2253. SPH_RET ( HA_ERR_END_OF_FILE );
  2254. }
  2255. }
  2256. if ( m_bUnpackError )
  2257. {
  2258. my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0), "INTERNAL ERROR: response unpacker failed" );
  2259. SafeDeleteArray ( m_pResponse );
  2260. SPH_RET ( HA_ERR_END_OF_FILE );
  2261. }
  2262. // zero out unmapped fields
  2263. for ( int i=SPHINXSE_SYSTEM_COLUMNS; i<(int)table->s->fields; i++ )
  2264. if ( m_dUnboundFields[i]!=SPH_ATTR_NONE )
  2265. switch ( m_dUnboundFields[i] )
  2266. {
  2267. case SPH_ATTR_INTEGER: table->field[i]->store ( 0, 1 ); break;
  2268. case SPH_ATTR_TIMESTAMP: longstore ( table->field[i]->ptr, 0 ); break;
  2269. default:
  2270. my_error ( ER_QUERY_ON_FOREIGN_DATA_SOURCE, MYF(0),
  2271. "INTERNAL ERROR: unhandled unbound field type %d", m_dUnboundFields[i] );
  2272. SafeDeleteArray ( m_pResponse );
  2273. SPH_RET ( HA_ERR_END_OF_FILE );
  2274. }
  2275. memset ( buf, 0, table->s->null_bytes );
  2276. m_iCurrentPos++;
  2277. #if MYSQL_VERSION_ID > 50100
  2278. dbug_tmp_restore_column_map(table->write_set, org_bitmap);
  2279. #endif
  2280. SPH_RET(0);
  2281. }
  2282. // Used to read backwards through the index.
  2283. int ha_sphinx::index_prev ( byte * )
  2284. {
  2285. SPH_ENTER_METHOD();
  2286. SPH_RET ( HA_ERR_WRONG_COMMAND );
  2287. }
  2288. // index_first() asks for the first key in the index.
  2289. //
  2290. // Called from opt_range.cc, opt_sum.cc, sql_handler.cc,
  2291. // and sql_select.cc.
  2292. int ha_sphinx::index_first ( byte * )
  2293. {
  2294. SPH_ENTER_METHOD();
  2295. SPH_RET ( HA_ERR_END_OF_FILE );
  2296. }
  2297. // index_last() asks for the last key in the index.
  2298. //
  2299. // Called from opt_range.cc, opt_sum.cc, sql_handler.cc,
  2300. // and sql_select.cc.
  2301. int ha_sphinx::index_last ( byte * )
  2302. {
  2303. SPH_ENTER_METHOD();
  2304. SPH_RET ( HA_ERR_WRONG_COMMAND );
  2305. }
  2306. int ha_sphinx::rnd_init ( bool )
  2307. {
  2308. SPH_ENTER_METHOD();
  2309. SPH_RET(0);
  2310. }
  2311. int ha_sphinx::rnd_end()
  2312. {
  2313. SPH_ENTER_METHOD();
  2314. SPH_RET(0);
  2315. }
  2316. int ha_sphinx::rnd_next ( byte * )
  2317. {
  2318. SPH_ENTER_METHOD();
  2319. SPH_RET ( HA_ERR_END_OF_FILE );
  2320. }
  2321. void ha_sphinx::position ( const byte * )
  2322. {
  2323. SPH_ENTER_METHOD();
  2324. SPH_VOID_RET();
  2325. }
  2326. // This is like rnd_next, but you are given a position to use
  2327. // to determine the row. The position will be of the type that you stored in
  2328. // ref. You can use ha_get_ptr(pos,ref_length) to retrieve whatever key
  2329. // or position you saved when position() was called.
  2330. // Called from filesort.cc records.cc sql_insert.cc sql_select.cc sql_update.cc.
  2331. int ha_sphinx::rnd_pos ( byte *, byte * )
  2332. {
  2333. SPH_ENTER_METHOD();
  2334. SPH_RET ( HA_ERR_WRONG_COMMAND );
  2335. }
  2336. #if MYSQL_VERSION_ID>=50030
  2337. int ha_sphinx::info ( uint )
  2338. #else
  2339. void ha_sphinx::info ( uint )
  2340. #endif
  2341. {
  2342. SPH_ENTER_METHOD();
  2343. if ( table->s->keys>0 )
  2344. table->key_info[0].rec_per_key[0] = 1;
  2345. #if MYSQL_VERSION_ID>50100
  2346. stats.records = 20;
  2347. #else
  2348. records = 20;
  2349. #endif
  2350. #if MYSQL_VERSION_ID>=50030
  2351. SPH_RET(0);
  2352. #else
  2353. SPH_VOID_RET();
  2354. #endif
  2355. }
  2356. int ha_sphinx::reset ()
  2357. {
  2358. SPH_ENTER_METHOD();
  2359. CSphSEThreadData * pTls = GetTls ();
  2360. if ( pTls )
  2361. pTls->m_bQuery = false;
  2362. SPH_RET(0);
  2363. }
  2364. int ha_sphinx::delete_all_rows()
  2365. {
  2366. SPH_ENTER_METHOD();
  2367. SPH_RET ( HA_ERR_WRONG_COMMAND );
  2368. }
  2369. // First you should go read the section "locking functions for mysql" in
  2370. // lock.cc to understand this.
  2371. // This create a lock on the table. If you are implementing a storage engine
  2372. // that can handle transacations look at ha_berkely.cc to see how you will
  2373. // want to go about doing this. Otherwise you should consider calling flock()
  2374. // here.
  2375. //
  2376. // Called from lock.cc by lock_external() and unlock_external(). Also called
  2377. // from sql_table.cc by copy_data_between_tables().
  2378. int ha_sphinx::external_lock ( THD *, int )
  2379. {
  2380. SPH_ENTER_METHOD();
  2381. SPH_RET(0);
  2382. }
  2383. THR_LOCK_DATA ** ha_sphinx::store_lock ( THD *, THR_LOCK_DATA ** to,
  2384. enum thr_lock_type lock_type )
  2385. {
  2386. SPH_ENTER_METHOD();
  2387. if ( lock_type!=TL_IGNORE && m_tLock.type==TL_UNLOCK )
  2388. m_tLock.type=lock_type;
  2389. *to++ = &m_tLock;
  2390. SPH_RET(to);
  2391. }
  2392. int ha_sphinx::delete_table ( const char * )
  2393. {
  2394. SPH_ENTER_METHOD();
  2395. SPH_RET(0);
  2396. }
  2397. // Renames a table from one name to another from alter table call.
  2398. //
  2399. // If you do not implement this, the default rename_table() is called from
  2400. // handler.cc and it will delete all files with the file extentions returned
  2401. // by bas_ext().
  2402. //
  2403. // Called from sql_table.cc by mysql_rename_table().
  2404. int ha_sphinx::rename_table ( const char *, const char * )
  2405. {
  2406. SPH_ENTER_METHOD();
  2407. SPH_RET(0);
  2408. }
  2409. // Given a starting key, and an ending key estimate the number of rows that
  2410. // will exist between the two. end_key may be empty which in case determine
  2411. // if start_key matches any rows.
  2412. //
  2413. // Called from opt_range.cc by check_quick_keys().
  2414. ha_rows ha_sphinx::records_in_range ( uint, key_range *, key_range * )
  2415. {
  2416. SPH_ENTER_METHOD();
  2417. SPH_RET(3); // low number to force index usage
  2418. }
  2419. static inline bool IsIntegerFieldType ( enum_field_types eType )
  2420. {
  2421. return eType==MYSQL_TYPE_LONG || eType==MYSQL_TYPE_LONGLONG;
  2422. }
  2423. // create() is called to create a database. The variable name will have the name
  2424. // of the table. When create() is called you do not need to worry about opening
  2425. // the table. Also, the FRM file will have already been created so adjusting
  2426. // create_info will not do you any good. You can overwrite the frm file at this
  2427. // point if you wish to change the table definition, but there are no methods
  2428. // currently provided for doing that.
  2429. //
  2430. // Called from handle.cc by ha_create_table().
  2431. int ha_sphinx::create ( const char * name, TABLE * table, HA_CREATE_INFO * )
  2432. {
  2433. SPH_ENTER_METHOD();
  2434. char sError[256];
  2435. if ( !ParseUrl ( NULL, table, true ) )
  2436. SPH_RET(-1);
  2437. for ( ;; )
  2438. {
  2439. // check system fields (count and types)
  2440. if ( table->s->fields<SPHINXSE_SYSTEM_COLUMNS )
  2441. {
  2442. my_snprintf ( sError, sizeof(sError), "%s: there MUST be at least %d columns",
  2443. name, SPHINXSE_SYSTEM_COLUMNS );
  2444. break;
  2445. }
  2446. if ( !IsIntegerFieldType ( table->field[0]->type() ) || !((Field_num *)table->field[0])->unsigned_flag )
  2447. {
  2448. my_snprintf ( sError, sizeof(sError), "%s: 1st column (docid) MUST be unsigned integer or bigint", name );
  2449. break;
  2450. }
  2451. if ( !IsIntegerFieldType ( table->field[1]->type() ) )
  2452. {
  2453. my_snprintf ( sError, sizeof(sError), "%s: 2nd column (weight) MUST be integer or bigint", name );
  2454. break;
  2455. }
  2456. enum_field_types f2 = table->field[2]->type();
  2457. if ( f2!=MYSQL_TYPE_VARCHAR
  2458. && f2!=MYSQL_TYPE_BLOB && f2!=MYSQL_TYPE_MEDIUM_BLOB && f2!=MYSQL_TYPE_LONG_BLOB && f2!=MYSQL_TYPE_TINY_BLOB )
  2459. {
  2460. my_snprintf ( sError, sizeof(sError), "%s: 3rd column (search query) MUST be varchar or text", name );
  2461. break;
  2462. }
  2463. // check attributes
  2464. int i;
  2465. for ( i=3; i<(int)table->s->fields; i++ )
  2466. {
  2467. enum_field_types eType = table->field[i]->type();
  2468. if ( eType!=MYSQL_TYPE_TIMESTAMP && !IsIntegerFieldType(eType) && eType!=MYSQL_TYPE_VARCHAR && eType!=MYSQL_TYPE_FLOAT )
  2469. {
  2470. my_snprintf ( sError, sizeof(sError), "%s: %dth column (attribute %s) MUST be integer, bigint, timestamp, varchar, or float",
  2471. name, i+1, table->field[i]->field_name );
  2472. break;
  2473. }
  2474. }
  2475. if ( i!=(int)table->s->fields )
  2476. break;
  2477. // check index
  2478. if (
  2479. table->s->keys!=1 ||
  2480. table->key_info[0].key_parts!=1 ||
  2481. strcasecmp ( table->key_info[0].key_part[0].field->field_name, table->field[2]->field_name ) )
  2482. {
  2483. my_snprintf ( sError, sizeof(sError), "%s: there must be an index on '%s' column",
  2484. name, table->field[2]->field_name );
  2485. break;
  2486. }
  2487. // all good
  2488. sError[0] = '\0';
  2489. break;
  2490. }
  2491. if ( sError[0] )
  2492. {
  2493. my_error ( ER_CANT_CREATE_TABLE, MYF(0), sError, -1 );
  2494. SPH_RET(-1);
  2495. }
  2496. SPH_RET(0);
  2497. }
  2498. //// show functions
  2499. #if MYSQL_VERSION_ID<50100
  2500. #define SHOW_VAR_FUNC_BUFF_SIZE 1024
  2501. #endif
  2502. static int sphinx_showfunc ( THD * thd, SHOW_VAR * out, char * sBuffer )
  2503. {
  2504. CSphSEThreadData *pTls = (CSphSEThreadData *) *thd_ha_data ( thd, sphinx_hton_ptr );
  2505. CSphSEStats * pStats = ( pTls && pTls->m_bStats ) ? &pTls->m_tStats : 0;
  2506. SHOW_VAR *array = (SHOW_VAR*)thd_alloc(thd, sizeof(SHOW_VAR)*7);
  2507. out->type = SHOW_ARRAY;
  2508. out->value = (char*)array;
  2509. if (pStats)
  2510. {
  2511. array[0].name = "total";
  2512. array[0].type = SHOW_INT;
  2513. array[0].value = (char *) &pStats->m_iMatchesTotal;
  2514. array[1].name = "total_found";
  2515. array[1].type = SHOW_INT;
  2516. array[1].value = (char *) &pStats->m_iMatchesFound;
  2517. array[2].name = "time";
  2518. array[2].type = SHOW_INT;
  2519. array[2].value = (char *) &pStats->m_iQueryMsec;
  2520. array[3].name = "word_count";
  2521. array[3].type = SHOW_INT;
  2522. array[3].value = (char *) &pStats->m_iWords;
  2523. array[4].name = "error";
  2524. array[4].type = SHOW_CHAR;
  2525. array[4].value = (char *) &pStats->m_sLastMessage;
  2526. array[5].name = "words";
  2527. array[5].type = SHOW_CHAR;
  2528. array[5].value = sBuffer;
  2529. sBuffer[0] = 0;
  2530. if ( pStats->m_iWords )
  2531. {
  2532. uint uBuffLen = 0;
  2533. // the following is partially based on code in sphinx_show_status()
  2534. for ( int i=0; i<pStats->m_iWords; i++ )
  2535. {
  2536. CSphSEWordStats & tWord = pStats->m_dWords[i];
  2537. uBuffLen = my_snprintf ( sBuffer, SHOW_VAR_FUNC_BUFF_SIZE, "%s%s:%d:%d ", sBuffer,
  2538. tWord.m_sWord, tWord.m_iDocs, tWord.m_iHits );
  2539. }
  2540. if ( uBuffLen > 0 )
  2541. {
  2542. // trim last space
  2543. sBuffer [ --uBuffLen ] = 0;
  2544. if ( pTls->m_pQueryCharset )
  2545. {
  2546. // String::c_ptr() will nul-terminate the buffer.
  2547. //
  2548. // NOTE: It's not entirely clear whether this conversion is necessary at all.
  2549. String sConvert;
  2550. uint iErrors;
  2551. sConvert.copy ( sBuffer, uBuffLen, pTls->m_pQueryCharset, system_charset_info, &iErrors );
  2552. memcpy ( sBuffer, sConvert.c_ptr(), sConvert.length() + 1 );
  2553. }
  2554. }
  2555. }
  2556. array[6].name = 0; // terminate the array
  2557. }
  2558. else
  2559. array[0].name = 0;
  2560. return 0;
  2561. }
  2562. #if MYSQL_VERSION_ID>50100
  2563. struct st_mysql_storage_engine sphinx_storage_engine =
  2564. {
  2565. MYSQL_HANDLERTON_INTERFACE_VERSION
  2566. };
  2567. struct st_mysql_show_var sphinx_status_vars[] =
  2568. {
  2569. {"sphinx", (char *)sphinx_showfunc, SHOW_FUNC},
  2570. {0, 0, (enum_mysql_show_type)0}
  2571. };
  2572. mysql_declare_plugin(sphinx)
  2573. {
  2574. MYSQL_STORAGE_ENGINE_PLUGIN,
  2575. &sphinx_storage_engine,
  2576. sphinx_hton_name,
  2577. "Sphinx developers",
  2578. sphinx_hton_comment,
  2579. PLUGIN_LICENSE_GPL,
  2580. sphinx_init_func, // Plugin Init
  2581. sphinx_done_func, // Plugin Deinit
  2582. 0x0001, // 0.1
  2583. sphinx_status_vars,
  2584. NULL,
  2585. NULL
  2586. }
  2587. mysql_declare_plugin_end;
  2588. #ifdef maria_declare_plugin
  2589. maria_declare_plugin(sphinx)
  2590. {
  2591. MYSQL_STORAGE_ENGINE_PLUGIN,
  2592. &sphinx_storage_engine,
  2593. sphinx_hton_name,
  2594. "Sphinx developers",
  2595. sphinx_hton_comment,
  2596. PLUGIN_LICENSE_GPL,
  2597. sphinx_init_func, // Plugin Init
  2598. sphinx_done_func, // Plugin Deinit
  2599. 0x0001, // 0.1
  2600. sphinx_status_vars,
  2601. NULL,
  2602. "0.1", // string version
  2603. MariaDB_PLUGIN_MATURITY_EXPERIMENTAL
  2604. }
  2605. maria_declare_plugin_end;
  2606. #endif
  2607. #endif // >50100
  2608. //
  2609. // $Id: ha_sphinx.cc 2058 2009-11-07 04:01:57Z shodan $
  2610. //