You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

559 lines
18 KiB

15 years ago
15 years ago
15 years ago
  1. /*
  2. * This program source code file is part of KICAD, a free EDA CAD application.
  3. *
  4. * Copyright (C) 2007-2010 SoftPLC Corporation, Dick Hollenbeck <dick@softplc.com>
  5. * Copyright (C) 2007-2024 Kicad Developers, see change_log.txt for contributors.
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License
  9. * as published by the Free Software Foundation; either version 2
  10. * of the License, or (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, you may find one here:
  19. * http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
  20. * or you may search the http://www.gnu.org website for the version 2 license,
  21. * or you may write to the Free Software Foundation, Inc.,
  22. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
  23. */
  24. #ifndef DSNLEXER_H_
  25. #define DSNLEXER_H_
  26. #include <kicommon.h>
  27. #include <cstdio>
  28. #include <hashtables.h>
  29. #include <string>
  30. #include <vector>
  31. #include <richio.h>
  32. #ifndef SWIG
  33. /**
  34. * Hold a keyword string and its unique integer token.
  35. */
  36. struct KICOMMON_API KEYWORD
  37. {
  38. const char* name; ///< unique keyword.
  39. int token; ///< a zero based index into an array of KEYWORDs
  40. };
  41. #endif // SWIG
  42. // something like this macro can be used to help initialize a KEYWORD table.
  43. // see SPECCTRA_DB::keywords[] as an example.
  44. //#define TOKDEF(x) { #x, T_##x }
  45. /**
  46. * List all the DSN lexer's tokens that are supported in lexing.
  47. *
  48. * It is up to the parser if it wants also to support them.
  49. */
  50. enum DSN_SYNTAX_T
  51. {
  52. DSN_NONE = -11,
  53. DSN_COMMENT = -10,
  54. DSN_STRING_QUOTE = -9,
  55. DSN_QUOTE_DEF = -8,
  56. DSN_DASH = -7,
  57. DSN_SYMBOL = -6,
  58. DSN_NUMBER = -5,
  59. DSN_RIGHT = -4, // right bracket, ')'
  60. DSN_LEFT = -3, // left bracket, '('
  61. DSN_STRING = -2, // a quoted string, stripped of the quotes
  62. DSN_EOF = -1 // special case for end of file
  63. };
  64. /**
  65. * Implement a lexical analyzer for the SPECCTRA DSN file format.
  66. *
  67. * It reads lexical tokens from the current #LINE_READER through the #NextTok() function.
  68. */
  69. class KICOMMON_API DSNLEXER
  70. {
  71. public:
  72. /**
  73. * Initialize a DSN lexer and prepares to read from aFile which is already open and has
  74. * \a aFilename.
  75. *
  76. * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount. This
  77. * token table need not contain the lexer separators such as '(' ')', etc.
  78. * @param aKeywordCount is the count of tokens in aKeywordTable.
  79. * @param aFile is an open file, which will be closed when this is destructed.
  80. * @param aFileName is the name of the file
  81. */
  82. DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount, const KEYWORD_MAP* aKeywordMap,
  83. FILE* aFile, const wxString& aFileName );
  84. /**
  85. * Initialize a DSN lexer and prepares to read from @a aSExpression.
  86. *
  87. * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount. This
  88. * token table need not contain the lexer separators such as '(' ')', etc.
  89. * @param aKeywordCount is the count of tokens in aKeywordTable.
  90. * @param aSExpression is text to feed through a STRING_LINE_READER
  91. * @param aSource is a description of aSExpression, used for error reporting.
  92. */
  93. DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount, const KEYWORD_MAP* aKeywordMap,
  94. const std::string& aSExpression, const wxString& aSource = wxEmptyString );
  95. /**
  96. * Initialize a DSN lexer and prepares to read from @a aSExpression.
  97. *
  98. * Use this one without a keyword table with the DOM parser in ptree.h.
  99. *
  100. * @param aSExpression is text to feed through a #STRING_LINE_READER
  101. * @param aSource is a description of aSExpression, used for error reporting.
  102. */
  103. DSNLEXER( const std::string& aSExpression, const wxString& aSource = wxEmptyString );
  104. /**
  105. * Initialize a DSN lexer and prepares to read from @a aLineReader which is already
  106. * open, and may be in use by other DSNLEXERs also.
  107. *
  108. * No ownership is taken of @a aLineReader. This enables it to be used by other DSNLEXERs.
  109. *
  110. * @param aKeywordTable is an array of #KEYWORDS holding \a aKeywordCount. This
  111. * token table need not contain the lexer separators such as '(' ')', etc.
  112. * @param aKeywordCount is the count of tokens in aKeywordTable.
  113. * @param aLineReader is any subclassed instance of LINE_READER, such as
  114. * #STRING_LINE_READER or #FILE_LINE_READER. No ownership is taken.
  115. */
  116. DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount, const KEYWORD_MAP* aKeywordMap,
  117. LINE_READER* aLineReader = nullptr );
  118. virtual ~DSNLEXER();
  119. /**
  120. * Reinit variables used during parsing, to ensure od states are not used in a new parsing
  121. * must be called before parsing a new file after parsing an old file to avoid
  122. * starting with some variables in a non initial state
  123. */
  124. void InitParserState();
  125. /**
  126. * Usable only for DSN lexers which share the same #LINE_READER.
  127. *
  128. * Synchronizes the pointers handling the data read by the #LINE_READER. Allows 2
  129. * #DNSLEXER objects to share the same current line, when switching from a #DNSLEXER
  130. * to another #DNSLEXER
  131. * @param aLexer the model.
  132. * @return true if the sync can be made ( at least the same line reader ).
  133. */
  134. bool SyncLineReaderWith( DSNLEXER& aLexer );
  135. /**
  136. * Change the behavior of this lexer into or out of "specctra mode".
  137. *
  138. * If specctra mode, then:
  139. * -#) stringDelimiter can be changed.
  140. * -#) KiCad quoting protocol is not in effect.
  141. * -#) space_in_quoted_tokens is functional else none of the above are true.
  142. *
  143. * The default mode is non-specctra mode, meaning:
  144. * -#) stringDelimiter cannot be changed.
  145. * -#) KiCad quoting protocol is in effect.
  146. * -#) space_in_quoted_tokens is not functional.
  147. */
  148. void SetSpecctraMode( bool aMode );
  149. /**
  150. * Manage a stack of LINE_READERs in order to handle nested file inclusion.
  151. *
  152. * This function pushes aLineReader onto the top of a stack of LINE_READERs and makes
  153. * it the current #LINE_READER with its own #GetSource(), line number and line text.
  154. * A grammar must be designed such that the "include" token (whatever its various names),
  155. * and any of its parameters are not followed by anything on that same line,
  156. * because PopReader always starts reading from a new line upon returning to
  157. * the original #LINE_READER.
  158. */
  159. void PushReader( LINE_READER* aLineReader );
  160. /**
  161. * Delete the top most #LINE_READER from an internal stack of LINE_READERs and
  162. * in the case of #FILE_LINE_READER this means the associated FILE is closed.
  163. *
  164. * The most recently used former #LINE_READER on the stack becomes the
  165. * current #LINE_READER and its previous position in its input stream and the
  166. * its latest line number should pertain. PopReader always starts reading
  167. * from a new line upon returning to the previous #LINE_READER. A pop is only
  168. * possible if there are at least 2 #LINE_READERs on the stack, since popping
  169. * the last one is not supported.
  170. *
  171. * @return the LINE_READER that was in use before the pop, or NULL
  172. * if there was not at least two readers on the stack and therefore the
  173. * pop failed.
  174. */
  175. LINE_READER* PopReader();
  176. /**
  177. * Return the next token found in the input file or DSN_EOF when reaching the end of
  178. * file.
  179. *
  180. * Users should wrap this function to return an enum to aid in grammar debugging while
  181. * running under a debugger, but leave this lower level function returning an int (so
  182. * the enum does not collide with another usage).
  183. *
  184. * @return the type of token found next.
  185. * @throw IO_ERROR only if the #LINE_READER throws it.
  186. */
  187. int NextTok();
  188. /**
  189. * Call #NextTok() and then verifies that the token read in satisfies #IsSymbol().
  190. *
  191. * @return the actual token read in.
  192. * @throw IO_ERROR if the next token does not satisfy IsSymbol().
  193. */
  194. int NeedSYMBOL();
  195. /**
  196. * Call #NextTok() and then verifies that the token read in satisfies bool IsSymbol() or
  197. * the next token is #DSN_NUMBER.
  198. *
  199. * @return the actual token read in.
  200. * @throw IO_ERROR if the next token does not satisfy the above test.
  201. */
  202. int NeedSYMBOLorNUMBER();
  203. /**
  204. * Call #NextTok() and then verifies that the token read is type #DSN_NUMBER.
  205. *
  206. * @return the actual token read in.
  207. * @throw IO_ERROR if the next token does not satisfy the above test.
  208. */
  209. int NeedNUMBER( const char* aExpectation );
  210. /**
  211. * Return whatever #NextTok() returned the last time it was called.
  212. */
  213. int CurTok() const
  214. {
  215. return curTok;
  216. }
  217. /**
  218. * Return whatever NextTok() returned the 2nd to last time it was called.
  219. */
  220. int PrevTok() const
  221. {
  222. return prevTok;
  223. }
  224. /**
  225. * Used to support "loose" matches (quoted tokens).
  226. */
  227. int GetCurStrAsToken() const
  228. {
  229. return findToken( curText );
  230. }
  231. /**
  232. * Change the string delimiter from the default " to some other character and return
  233. * the old value.
  234. *
  235. * @param aStringDelimiter The character in lowest 8 bits.
  236. * @return The old delimiter in the lowest 8 bits.
  237. */
  238. char SetStringDelimiter( char aStringDelimiter )
  239. {
  240. char old = stringDelimiter;
  241. if( specctraMode )
  242. stringDelimiter = aStringDelimiter;
  243. return old;
  244. }
  245. /**
  246. * Change the setting controlling whether a space in a quoted string isa terminator.
  247. *
  248. * @param val If true, means
  249. */
  250. bool SetSpaceInQuotedTokens( bool val )
  251. {
  252. bool old = space_in_quoted_tokens;
  253. if( specctraMode )
  254. space_in_quoted_tokens = val;
  255. return old;
  256. }
  257. /**
  258. * Change the handling of comments.
  259. *
  260. * If set true, comments are returned as single line strings with a terminating newline.
  261. * Otherwise they are consumed by the lexer and not returned.
  262. */
  263. bool SetCommentsAreTokens( bool val )
  264. {
  265. bool old = commentsAreTokens;
  266. commentsAreTokens = val;
  267. return old;
  268. }
  269. /**
  270. * Check the next sequence of tokens and reads them into a wxArrayString if they are
  271. * comments.
  272. *
  273. * Reading continues until a non-comment token is encountered, and such last read token
  274. * remains as #CurTok() and as #CurText(). No push back or "un get" mechanism is used
  275. * for this support. Upon return you simply avoid calling NextTok() for the next token,
  276. * but rather #CurTok().
  277. *
  278. * @return Heap allocated block of comments or NULL if none. The caller owns the
  279. * allocation and must delete if not NULL.
  280. */
  281. wxArrayString* ReadCommentLines();
  282. /**
  283. * Test a token to see if it is a symbol.
  284. *
  285. * This means it cannot be a special delimiter character such as #DSN_LEFT, #DSN_RIGHT,
  286. * #DSN_QUOTE, etc. It may however, coincidentally match a keyword and still be a symbol.
  287. */
  288. static bool IsSymbol( int aTok );
  289. /**
  290. * Throw an #IO_ERROR exception with an input file specific error message.
  291. *
  292. * @param aTok is the token/keyword type which was expected at the current input location.
  293. * @throw IO_ERROR with the location within the input file of the problem.
  294. */
  295. void Expecting( int aTok ) const;
  296. /**
  297. * Throw an #IO_ERROR exception with an input file specific error message.
  298. *
  299. * @param aTokenList is the token/keyword type which was expected at the
  300. * current input location, e.g. "pin|graphic|property".
  301. * @throw IO_ERROR with the location within the input file of the problem.
  302. */
  303. void Expecting( const char* aTokenList ) const;
  304. /**
  305. * Throw an #IO_ERROR exception with an input file specific error message.
  306. *
  307. * @param aTok is the token/keyword type which was not expected at the
  308. * current input location.
  309. * @throw IO_ERROR with the location within the input file of the problem.
  310. */
  311. void Unexpected( int aTok ) const;
  312. /**
  313. * Throw an #IO_ERROR exception with an input file specific error message.
  314. *
  315. * @param aToken is the token which was not expected at the current input location.
  316. * @throw IO_ERROR with the location within the input file of the problem.
  317. */
  318. void Unexpected( const char* aToken ) const;
  319. /**
  320. * Throw an #IO_ERROR exception with a message saying specifically that \a aTok
  321. * is a duplicate of one already seen in current context.
  322. *
  323. * @param aTok is the token/keyword type which was not expected at the current input
  324. * location.
  325. * @throw IO_ERROR with the location within the input file of the problem.
  326. */
  327. void Duplicate( int aTok );
  328. /**
  329. * Call #NextTok() and then verifies that the token read in is a #DSN_LEFT.
  330. *
  331. * @throw IO_ERROR if the next token is not a #DSN_LEFT
  332. */
  333. void NeedLEFT();
  334. /**
  335. * Call #NextTok() and then verifies that the token read in is a #DSN_RIGHT.
  336. *
  337. * @throw IO_ERROR if the next token is not a #DSN_RIGHT
  338. */
  339. void NeedRIGHT();
  340. /**
  341. * Return the C string representation of a #DSN_T value.
  342. */
  343. const char* GetTokenText( int aTok ) const;
  344. /**
  345. * Return a quote wrapped wxString representation of a token value.
  346. */
  347. wxString GetTokenString( int aTok ) const;
  348. static const char* Syntax( int aTok );
  349. /**
  350. * Return a pointer to the current token's text.
  351. */
  352. const char* CurText() const
  353. {
  354. return curText.c_str();
  355. }
  356. /**
  357. * Return a reference to current token in std::string form.
  358. */
  359. const std::string& CurStr() const
  360. {
  361. return curText;
  362. }
  363. /**
  364. * Return the current token text as a wxString, assuming that the input byte stream
  365. * is UTF8 encoded.
  366. */
  367. wxString FromUTF8() const
  368. {
  369. return wxString::FromUTF8( curText.c_str() );
  370. }
  371. /**
  372. * Return the current line number within my #LINE_READER.
  373. */
  374. int CurLineNumber() const
  375. {
  376. return reader->LineNumber();
  377. }
  378. /**
  379. * Return the current line of text from which the #CurText() would return its token.
  380. */
  381. const char* CurLine() const
  382. {
  383. return (const char*)(*reader);
  384. }
  385. /**
  386. * Return the current #LINE_READER source.
  387. *
  388. * @return source of the lines of text, e.g. a filename or "clipboard".
  389. */
  390. const wxString& CurSource() const
  391. {
  392. return reader->GetSource();
  393. }
  394. /**
  395. * Return the byte offset within the current line, using a 1 based index.
  396. *
  397. * @return a one based index into the current line.
  398. */
  399. int CurOffset() const
  400. {
  401. return curOffset + 1;
  402. }
  403. #ifndef SWIG
  404. protected:
  405. void init();
  406. int readLine()
  407. {
  408. if( reader )
  409. {
  410. reader->ReadLine();
  411. unsigned len = reader->Length();
  412. // start may have changed in ReadLine(), which can resize and
  413. // relocate reader's line buffer.
  414. start = reader->Line();
  415. next = start;
  416. limit = next + len;
  417. return len;
  418. }
  419. return 0;
  420. }
  421. /**
  422. * Take @a aToken string and looks up the string in the keywords table.
  423. *
  424. * @param aToken is a string to lookup in the keywords table.
  425. * @return with a value from the enum #DSN_T matching the keyword text,
  426. * or #DSN_SYMBOL if @a aToken is not in the keywords table.
  427. */
  428. int findToken( const std::string& aToken ) const;
  429. bool isStringTerminator( char cc ) const
  430. {
  431. if( !space_in_quoted_tokens && cc == ' ' )
  432. return true;
  433. if( cc == stringDelimiter )
  434. return true;
  435. return false;
  436. }
  437. /**
  438. * Parse the current token as an ASCII numeric string with possible leading
  439. * whitespace into a double precision floating point number.
  440. *
  441. * @throw IO_ERROR if an error occurs attempting to convert the current token.
  442. * @return The result of the parsed token.
  443. */
  444. double parseDouble();
  445. double parseDouble( const char* aExpected )
  446. {
  447. NeedNUMBER( aExpected );
  448. return parseDouble();
  449. }
  450. template <typename T>
  451. inline double parseDouble( T aToken )
  452. {
  453. return parseDouble( GetTokenText( aToken ) );
  454. }
  455. bool iOwnReaders; ///< on readerStack, should I delete them?
  456. const char* start;
  457. const char* next;
  458. const char* limit;
  459. char dummy[1]; ///< when there is no reader.
  460. typedef std::vector<LINE_READER*> READER_STACK;
  461. READER_STACK readerStack; ///< all the LINE_READERs by pointer.
  462. ///< no ownership. ownership is via readerStack, maybe, if iOwnReaders
  463. LINE_READER* reader;
  464. bool specctraMode; ///< if true, then:
  465. ///< 1) stringDelimiter can be changed
  466. ///< 2) Kicad quoting protocol is not in effect
  467. ///< 3) space_in_quoted_tokens is functional
  468. ///< else not.
  469. char stringDelimiter;
  470. bool space_in_quoted_tokens; ///< blank spaces within quoted strings
  471. bool commentsAreTokens; ///< true if should return comments as tokens
  472. int prevTok; ///< curTok from previous NextTok() call.
  473. int curOffset; ///< offset within current line of the current token
  474. int curTok; ///< the current token obtained on last NextTok()
  475. std::string curText; ///< the text of the current token
  476. const KEYWORD* keywords; ///< table sorted by CMake for bsearch()
  477. unsigned keywordCount; ///< count of keywords table
  478. const KEYWORD_MAP* keywordsLookup; ///< fast, specialized "C string" hashtable
  479. #endif // SWIG
  480. };
  481. #endif // DSNLEXER_H_