You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

567 lines
19 KiB

15 years ago
15 years ago
15 years ago
  1. /*
  2. * This program source code file is part of KICAD, a free EDA CAD application.
  3. *
  4. * Copyright (C) 2007-2010 SoftPLC Corporation, Dick Hollenbeck <dick@softplc.com>
  5. * Copyright The KiCad Developers, see AUTHORS.txt for contributors.
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License
  9. * as published by the Free Software Foundation; either version 2
  10. * of the License, or (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, you may find one here:
  19. * http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
  20. * or you may search the http://www.gnu.org website for the version 2 license,
  21. * or you may write to the Free Software Foundation, Inc.,
  22. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
  23. */
  24. #ifndef DSNLEXER_H_
  25. #define DSNLEXER_H_
  26. #include <kicommon.h>
  27. #include <cstdio>
  28. #include <hashtables.h>
  29. #include <string>
  30. #include <vector>
  31. #include <richio.h>
  32. #ifndef SWIG
  33. /**
  34. * Hold a keyword string and its unique integer token.
  35. */
  36. struct KICOMMON_API KEYWORD
  37. {
  38. const char* name; ///< unique keyword.
  39. int token; ///< a zero based index into an array of KEYWORDs
  40. };
  41. #endif // SWIG
  42. // something like this macro can be used to help initialize a KEYWORD table.
  43. // see SPECCTRA_DB::keywords[] as an example.
  44. //#define TOKDEF(x) { #x, T_##x }
  45. /**
  46. * List all the DSN lexer's tokens that are supported in lexing.
  47. *
  48. * It is up to the parser if it wants also to support them.
  49. */
  50. enum DSN_SYNTAX_T
  51. {
  52. DSN_NONE = -12,
  53. DSN_BAR = -11, // Also called pipe '|'
  54. DSN_COMMENT = -10,
  55. DSN_STRING_QUOTE = -9,
  56. DSN_QUOTE_DEF = -8,
  57. DSN_DASH = -7,
  58. DSN_SYMBOL = -6,
  59. DSN_NUMBER = -5,
  60. DSN_RIGHT = -4, // right bracket, ')'
  61. DSN_LEFT = -3, // left bracket, '('
  62. DSN_STRING = -2, // a quoted string, stripped of the quotes
  63. DSN_EOF = -1 // special case for end of file
  64. };
  65. /**
  66. * Implement a lexical analyzer for the SPECCTRA DSN file format.
  67. *
  68. * It reads lexical tokens from the current #LINE_READER through the #NextTok() function.
  69. */
  70. class KICOMMON_API DSNLEXER
  71. {
  72. public:
  73. /**
  74. * Initialize a DSN lexer and prepares to read from aFile which is already open and has
  75. * \a aFilename.
  76. *
  77. * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount. This
  78. * token table need not contain the lexer separators such as '(' ')', etc.
  79. * @param aKeywordCount is the count of tokens in aKeywordTable.
  80. * @param aFile is an open file, which will be closed when this is destructed.
  81. * @param aFileName is the name of the file
  82. */
  83. DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount, const KEYWORD_MAP* aKeywordMap,
  84. FILE* aFile, const wxString& aFileName );
  85. /**
  86. * Initialize a DSN lexer and prepares to read from @a aSExpression.
  87. *
  88. * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount. This
  89. * token table need not contain the lexer separators such as '(' ')', etc.
  90. * @param aKeywordCount is the count of tokens in aKeywordTable.
  91. * @param aSExpression is text to feed through a STRING_LINE_READER
  92. * @param aSource is a description of aSExpression, used for error reporting.
  93. */
  94. DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount, const KEYWORD_MAP* aKeywordMap,
  95. const std::string& aSExpression, const wxString& aSource = wxEmptyString );
  96. /**
  97. * Initialize a DSN lexer and prepares to read from @a aSExpression.
  98. *
  99. * Use this one without a keyword table with the DOM parser in ptree.h.
  100. *
  101. * @param aSExpression is text to feed through a #STRING_LINE_READER
  102. * @param aSource is a description of aSExpression, used for error reporting.
  103. */
  104. DSNLEXER( const std::string& aSExpression, const wxString& aSource = wxEmptyString );
  105. /**
  106. * Initialize a DSN lexer and prepares to read from @a aLineReader which is already
  107. * open, and may be in use by other DSNLEXERs also.
  108. *
  109. * No ownership is taken of @a aLineReader. This enables it to be used by other DSNLEXERs.
  110. *
  111. * @param aKeywordTable is an array of #KEYWORDS holding \a aKeywordCount. This
  112. * token table need not contain the lexer separators such as '(' ')', etc.
  113. * @param aKeywordCount is the count of tokens in aKeywordTable.
  114. * @param aLineReader is any subclassed instance of LINE_READER, such as
  115. * #STRING_LINE_READER or #FILE_LINE_READER. No ownership is taken.
  116. */
  117. DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount, const KEYWORD_MAP* aKeywordMap,
  118. LINE_READER* aLineReader = nullptr );
  119. virtual ~DSNLEXER();
  120. /**
  121. * Reinit variables used during parsing, to ensure od states are not used in a new parsing
  122. * must be called before parsing a new file after parsing an old file to avoid
  123. * starting with some variables in a non initial state
  124. */
  125. void InitParserState();
  126. /**
  127. * Usable only for DSN lexers which share the same #LINE_READER.
  128. *
  129. * Synchronizes the pointers handling the data read by the #LINE_READER. Allows 2
  130. * #DNSLEXER objects to share the same current line, when switching from a #DNSLEXER
  131. * to another #DNSLEXER
  132. * @param aLexer the model.
  133. * @return true if the sync can be made ( at least the same line reader ).
  134. */
  135. bool SyncLineReaderWith( DSNLEXER& aLexer );
  136. /**
  137. * Change the behavior of this lexer into or out of "specctra mode".
  138. *
  139. * If specctra mode, then:
  140. * -#) stringDelimiter can be changed.
  141. * -#) KiCad quoting protocol is not in effect.
  142. * -#) space_in_quoted_tokens is functional else none of the above are true.
  143. *
  144. * The default mode is non-specctra mode, meaning:
  145. * -#) stringDelimiter cannot be changed.
  146. * -#) KiCad quoting protocol is in effect.
  147. * -#) space_in_quoted_tokens is not functional.
  148. */
  149. void SetSpecctraMode( bool aMode );
  150. /**
  151. * Manage a stack of LINE_READERs in order to handle nested file inclusion.
  152. *
  153. * This function pushes aLineReader onto the top of a stack of LINE_READERs and makes
  154. * it the current #LINE_READER with its own #GetSource(), line number and line text.
  155. * A grammar must be designed such that the "include" token (whatever its various names),
  156. * and any of its parameters are not followed by anything on that same line,
  157. * because PopReader always starts reading from a new line upon returning to
  158. * the original #LINE_READER.
  159. */
  160. void PushReader( LINE_READER* aLineReader );
  161. /**
  162. * Delete the top most #LINE_READER from an internal stack of LINE_READERs and
  163. * in the case of #FILE_LINE_READER this means the associated FILE is closed.
  164. *
  165. * The most recently used former #LINE_READER on the stack becomes the
  166. * current #LINE_READER and its previous position in its input stream and the
  167. * its latest line number should pertain. PopReader always starts reading
  168. * from a new line upon returning to the previous #LINE_READER. A pop is only
  169. * possible if there are at least 2 #LINE_READERs on the stack, since popping
  170. * the last one is not supported.
  171. *
  172. * @return the LINE_READER that was in use before the pop, or NULL
  173. * if there was not at least two readers on the stack and therefore the
  174. * pop failed.
  175. */
  176. LINE_READER* PopReader();
  177. /**
  178. * Return the next token found in the input file or DSN_EOF when reaching the end of
  179. * file.
  180. *
  181. * Users should wrap this function to return an enum to aid in grammar debugging while
  182. * running under a debugger, but leave this lower level function returning an int (so
  183. * the enum does not collide with another usage).
  184. *
  185. * @return the type of token found next.
  186. * @throw IO_ERROR only if the #LINE_READER throws it.
  187. */
  188. int NextTok();
  189. /**
  190. * Call #NextTok() and then verifies that the token read in satisfies #IsSymbol().
  191. *
  192. * @return the actual token read in.
  193. * @throw IO_ERROR if the next token does not satisfy IsSymbol().
  194. */
  195. int NeedSYMBOL();
  196. /**
  197. * Call #NextTok() and then verifies that the token read in satisfies bool IsSymbol() or
  198. * the next token is #DSN_NUMBER.
  199. *
  200. * @return the actual token read in.
  201. * @throw IO_ERROR if the next token does not satisfy the above test.
  202. */
  203. int NeedSYMBOLorNUMBER();
  204. /**
  205. * Call #NextTok() and then verifies that the token read is type #DSN_NUMBER.
  206. *
  207. * @return the actual token read in.
  208. * @throw IO_ERROR if the next token does not satisfy the above test.
  209. */
  210. int NeedNUMBER( const char* aExpectation );
  211. /**
  212. * Return whatever #NextTok() returned the last time it was called.
  213. */
  214. int CurTok() const
  215. {
  216. return curTok;
  217. }
  218. /**
  219. * Return whatever NextTok() returned the 2nd to last time it was called.
  220. */
  221. int PrevTok() const
  222. {
  223. return prevTok;
  224. }
  225. /**
  226. * Used to support "loose" matches (quoted tokens).
  227. */
  228. int GetCurStrAsToken() const
  229. {
  230. return findToken( curText );
  231. }
  232. /**
  233. * Change the string delimiter from the default " to some other character and return
  234. * the old value.
  235. *
  236. * @param aStringDelimiter The character in lowest 8 bits.
  237. * @return The old delimiter in the lowest 8 bits.
  238. */
  239. char SetStringDelimiter( char aStringDelimiter )
  240. {
  241. char old = stringDelimiter;
  242. if( specctraMode )
  243. stringDelimiter = aStringDelimiter;
  244. return old;
  245. }
  246. /**
  247. * Change the setting controlling whether a space in a quoted string isa terminator.
  248. *
  249. * @param val If true, means
  250. */
  251. bool SetSpaceInQuotedTokens( bool val )
  252. {
  253. bool old = space_in_quoted_tokens;
  254. if( specctraMode )
  255. space_in_quoted_tokens = val;
  256. return old;
  257. }
  258. /**
  259. * Change the handling of comments.
  260. *
  261. * If set true, comments are returned as single line strings with a terminating newline.
  262. * Otherwise they are consumed by the lexer and not returned.
  263. */
  264. bool SetCommentsAreTokens( bool val )
  265. {
  266. bool old = commentsAreTokens;
  267. commentsAreTokens = val;
  268. return old;
  269. }
  270. /**
  271. * Check the next sequence of tokens and reads them into a wxArrayString if they are
  272. * comments.
  273. *
  274. * Reading continues until a non-comment token is encountered, and such last read token
  275. * remains as #CurTok() and as #CurText(). No push back or "un get" mechanism is used
  276. * for this support. Upon return you simply avoid calling NextTok() for the next token,
  277. * but rather #CurTok().
  278. *
  279. * @return Heap allocated block of comments or NULL if none. The caller owns the
  280. * allocation and must delete if not NULL.
  281. */
  282. wxArrayString* ReadCommentLines();
  283. /**
  284. * Test a token to see if it is a symbol.
  285. *
  286. * This means it cannot be a special delimiter character such as #DSN_LEFT, #DSN_RIGHT,
  287. * #DSN_QUOTE, etc. It may however, coincidentally match a keyword and still be a symbol.
  288. */
  289. static bool IsSymbol( int aTok );
  290. /**
  291. * Throw an #IO_ERROR exception with an input file specific error message.
  292. *
  293. * @param aTok is the token/keyword type which was expected at the current input location.
  294. * @throw IO_ERROR with the location within the input file of the problem.
  295. */
  296. void Expecting( int aTok ) const;
  297. /**
  298. * Throw an #IO_ERROR exception with an input file specific error message.
  299. *
  300. * @param aTokenList is the token/keyword type which was expected at the
  301. * current input location, e.g. "pin|graphic|property".
  302. * @throw IO_ERROR with the location within the input file of the problem.
  303. */
  304. void Expecting( const char* aTokenList ) const;
  305. /**
  306. * Throw an #IO_ERROR exception with an input file specific error message.
  307. *
  308. * @param aTok is the token/keyword type which was not expected at the
  309. * current input location.
  310. * @throw IO_ERROR with the location within the input file of the problem.
  311. */
  312. void Unexpected( int aTok ) const;
  313. /**
  314. * Throw an #IO_ERROR exception with an input file specific error message.
  315. *
  316. * @param aToken is the token which was not expected at the current input location.
  317. * @throw IO_ERROR with the location within the input file of the problem.
  318. */
  319. void Unexpected( const char* aToken ) const;
  320. /**
  321. * Throw an #IO_ERROR exception with a message saying specifically that \a aTok
  322. * is a duplicate of one already seen in current context.
  323. *
  324. * @param aTok is the token/keyword type which was not expected at the current input
  325. * location.
  326. * @throw IO_ERROR with the location within the input file of the problem.
  327. */
  328. void Duplicate( int aTok );
  329. /**
  330. * Call #NextTok() and then verifies that the token read in is a #DSN_LEFT.
  331. *
  332. * @throw IO_ERROR if the next token is not a #DSN_LEFT
  333. */
  334. void NeedLEFT();
  335. /**
  336. * Call #NextTok() and then verifies that the token read in is a #DSN_RIGHT.
  337. *
  338. * @throw IO_ERROR if the next token is not a #DSN_RIGHT
  339. */
  340. void NeedRIGHT();
  341. /**
  342. * Call #NextTok() and then verifies that the token read in is a #DSN_BAR.
  343. *
  344. * @throw IO_ERROR if the next token is not a #DSN_BAR
  345. */
  346. void NeedBAR();
  347. /**
  348. * Return the C string representation of a #DSN_T value.
  349. */
  350. const char* GetTokenText( int aTok ) const;
  351. /**
  352. * Return a quote wrapped wxString representation of a token value.
  353. */
  354. wxString GetTokenString( int aTok ) const;
  355. static const char* Syntax( int aTok );
  356. /**
  357. * Return a pointer to the current token's text.
  358. */
  359. const char* CurText() const
  360. {
  361. return curText.c_str();
  362. }
  363. /**
  364. * Return a reference to current token in std::string form.
  365. */
  366. const std::string& CurStr() const
  367. {
  368. return curText;
  369. }
  370. /**
  371. * Return the current token text as a wxString, assuming that the input byte stream
  372. * is UTF8 encoded.
  373. */
  374. wxString FromUTF8() const
  375. {
  376. return wxString::FromUTF8( curText.c_str() );
  377. }
  378. /**
  379. * Return the current line number within my #LINE_READER.
  380. */
  381. int CurLineNumber() const
  382. {
  383. return reader->LineNumber();
  384. }
  385. /**
  386. * Return the current line of text from which the #CurText() would return its token.
  387. */
  388. const char* CurLine() const
  389. {
  390. return (const char*)(*reader);
  391. }
  392. /**
  393. * Return the current #LINE_READER source.
  394. *
  395. * @return source of the lines of text, e.g. a filename or "clipboard".
  396. */
  397. const wxString& CurSource() const
  398. {
  399. return reader->GetSource();
  400. }
  401. /**
  402. * Return the byte offset within the current line, using a 1 based index.
  403. *
  404. * @return a one based index into the current line.
  405. */
  406. int CurOffset() const
  407. {
  408. return curOffset + 1;
  409. }
  410. #ifndef SWIG
  411. protected:
  412. void init();
  413. int readLine()
  414. {
  415. if( reader )
  416. {
  417. reader->ReadLine();
  418. unsigned len = reader->Length();
  419. // start may have changed in ReadLine(), which can resize and
  420. // relocate reader's line buffer.
  421. start = reader->Line();
  422. next = start;
  423. limit = next + len;
  424. return len;
  425. }
  426. return 0;
  427. }
  428. /**
  429. * Take @a aToken string and looks up the string in the keywords table.
  430. *
  431. * @param aToken is a string to lookup in the keywords table.
  432. * @return with a value from the enum #DSN_T matching the keyword text,
  433. * or #DSN_SYMBOL if @a aToken is not in the keywords table.
  434. */
  435. int findToken( const std::string& aToken ) const;
  436. bool isStringTerminator( char cc ) const
  437. {
  438. if( !space_in_quoted_tokens && cc == ' ' )
  439. return true;
  440. if( cc == stringDelimiter )
  441. return true;
  442. return false;
  443. }
  444. /**
  445. * Parse the current token as an ASCII numeric string with possible leading
  446. * whitespace into a double precision floating point number.
  447. *
  448. * @throw IO_ERROR if an error occurs attempting to convert the current token.
  449. * @return The result of the parsed token.
  450. */
  451. double parseDouble();
  452. double parseDouble( const char* aExpected )
  453. {
  454. NeedNUMBER( aExpected );
  455. return parseDouble();
  456. }
  457. template <typename T>
  458. inline double parseDouble( T aToken )
  459. {
  460. return parseDouble( GetTokenText( aToken ) );
  461. }
  462. bool iOwnReaders; ///< On readerStack, should I delete them?
  463. const char* start;
  464. const char* next;
  465. const char* limit;
  466. char dummy[1]; ///< When there is no reader.
  467. typedef std::vector<LINE_READER*> READER_STACK;
  468. READER_STACK readerStack; ///< all the LINE_READERs by pointer.
  469. /// No ownership. ownership is via readerStack, maybe, if #iOwnReaders.
  470. LINE_READER* reader;
  471. bool specctraMode; ///< if true, then:
  472. ///< 1) stringDelimiter can be changed
  473. ///< 2) Kicad quoting protocol is not in effect
  474. ///< 3) space_in_quoted_tokens is functional
  475. ///< else not.
  476. char stringDelimiter;
  477. bool space_in_quoted_tokens; ///< Blank spaces within quoted strings.
  478. bool commentsAreTokens; ///< True if should return comments as tokens.
  479. int prevTok; ///< #curTok from previous NextTok() call.
  480. int curOffset; ///< Offset within current line of the current token
  481. int curTok; ///< The current token obtained on last NextTok().
  482. std::string curText; ///< The text of the current token.
  483. const KEYWORD* keywords; ///< Table sorted by CMake for bsearch().
  484. unsigned keywordCount; ///< Count of keywords table.
  485. const KEYWORD_MAP* keywordsLookup; ///< Fast, specialized "C string" hashtable.
  486. #endif // SWIG
  487. };
  488. #endif // DSNLEXER_H_