You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

558 lines
18 KiB

15 years ago
15 years ago
15 years ago
  1. /*
  2. * This program source code file is part of KICAD, a free EDA CAD application.
  3. *
  4. * Copyright (C) 2007-2010 SoftPLC Corporation, Dick Hollenbeck <dick@softplc.com>
  5. * Copyright (C) 2007-2021 Kicad Developers, see change_log.txt for contributors.
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License
  9. * as published by the Free Software Foundation; either version 2
  10. * of the License, or (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, you may find one here:
  19. * http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
  20. * or you may search the http://www.gnu.org website for the version 2 license,
  21. * or you may write to the Free Software Foundation, Inc.,
  22. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
  23. */
  24. #ifndef DSNLEXER_H_
  25. #define DSNLEXER_H_
  26. #include <cstdio>
  27. #include <hashtables.h>
  28. #include <string>
  29. #include <vector>
  30. #include <richio.h>
  31. #ifndef SWIG
  32. /**
  33. * Hold a keyword string and its unique integer token.
  34. */
  35. struct KEYWORD
  36. {
  37. const char* name; ///< unique keyword.
  38. int token; ///< a zero based index into an array of KEYWORDs
  39. };
  40. #endif // SWIG
  41. // something like this macro can be used to help initialize a KEYWORD table.
  42. // see SPECCTRA_DB::keywords[] as an example.
  43. //#define TOKDEF(x) { #x, T_##x }
  44. /**
  45. * List all the DSN lexer's tokens that are supported in lexing.
  46. *
  47. * It is up to the parser if it wants also to support them.
  48. */
  49. enum DSN_SYNTAX_T
  50. {
  51. DSN_NONE = -11,
  52. DSN_COMMENT = -10,
  53. DSN_STRING_QUOTE = -9,
  54. DSN_QUOTE_DEF = -8,
  55. DSN_DASH = -7,
  56. DSN_SYMBOL = -6,
  57. DSN_NUMBER = -5,
  58. DSN_RIGHT = -4, // right bracket, ')'
  59. DSN_LEFT = -3, // left bracket, '('
  60. DSN_STRING = -2, // a quoted string, stripped of the quotes
  61. DSN_EOF = -1 // special case for end of file
  62. };
  63. /**
  64. * Implement a lexical analyzer for the SPECCTRA DSN file format.
  65. *
  66. * It reads lexical tokens from the current #LINE_READER through the #NextTok() function.
  67. */
  68. class DSNLEXER
  69. {
  70. public:
  71. /**
  72. * Initialize a DSN lexer and prepares to read from aFile which is already open and has
  73. * \a aFilename.
  74. *
  75. * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount. This
  76. * token table need not contain the lexer separators such as '(' ')', etc.
  77. * @param aKeywordCount is the count of tokens in aKeywordTable.
  78. * @param aFile is an open file, which will be closed when this is destructed.
  79. * @param aFileName is the name of the file
  80. */
  81. DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount, const KEYWORD_MAP* aKeywordMap,
  82. FILE* aFile, const wxString& aFileName );
  83. /**
  84. * Initialize a DSN lexer and prepares to read from @a aSExpression.
  85. *
  86. * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount. This
  87. * token table need not contain the lexer separators such as '(' ')', etc.
  88. * @param aKeywordCount is the count of tokens in aKeywordTable.
  89. * @param aSExpression is text to feed through a STRING_LINE_READER
  90. * @param aSource is a description of aSExpression, used for error reporting.
  91. */
  92. DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount, const KEYWORD_MAP* aKeywordMap,
  93. const std::string& aSExpression, const wxString& aSource = wxEmptyString );
  94. /**
  95. * Initialize a DSN lexer and prepares to read from @a aSExpression.
  96. *
  97. * Use this one without a keyword table with the DOM parser in ptree.h.
  98. *
  99. * @param aSExpression is text to feed through a #STRING_LINE_READER
  100. * @param aSource is a description of aSExpression, used for error reporting.
  101. */
  102. DSNLEXER( const std::string& aSExpression, const wxString& aSource = wxEmptyString );
  103. /**
  104. * Initialize a DSN lexer and prepares to read from @a aLineReader which is already
  105. * open, and may be in use by other DSNLEXERs also.
  106. *
  107. * No ownership is taken of @a aLineReader. This enables it to be used by other DSNLEXERs.
  108. *
  109. * @param aKeywordTable is an array of #KEYWORDS holding \a aKeywordCount. This
  110. * token table need not contain the lexer separators such as '(' ')', etc.
  111. * @param aKeywordCount is the count of tokens in aKeywordTable.
  112. * @param aLineReader is any subclassed instance of LINE_READER, such as
  113. * #STRING_LINE_READER or #FILE_LINE_READER. No ownership is taken.
  114. */
  115. DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount, const KEYWORD_MAP* aKeywordMap,
  116. LINE_READER* aLineReader = nullptr );
  117. virtual ~DSNLEXER();
  118. /**
  119. * Reinit variables used during parsing, to ensure od states are not used in a new parsing
  120. * must be called before parsing a new file after parsing an old file to avoid
  121. * starting with some variables in a non initial state
  122. */
  123. void InitParserState();
  124. /**
  125. * Usable only for DSN lexers which share the same #LINE_READER.
  126. *
  127. * Synchronizes the pointers handling the data read by the #LINE_READER. Allows 2
  128. * #DNSLEXER objects to share the same current line, when switching from a #DNSLEXER
  129. * to another #DNSLEXER
  130. * @param aLexer the model.
  131. * @return true if the sync can be made ( at least the same line reader ).
  132. */
  133. bool SyncLineReaderWith( DSNLEXER& aLexer );
  134. /**
  135. * Change the behavior of this lexer into or out of "specctra mode".
  136. *
  137. * If specctra mode, then:
  138. * -#) stringDelimiter can be changed.
  139. * -#) KiCad quoting protocol is not in effect.
  140. * -#) space_in_quoted_tokens is functional else none of the above are true.
  141. *
  142. * The default mode is non-specctra mode, meaning:
  143. * -#) stringDelimiter cannot be changed.
  144. * -#) KiCad quoting protocol is in effect.
  145. * -#) space_in_quoted_tokens is not functional.
  146. */
  147. void SetSpecctraMode( bool aMode );
  148. /**
  149. * Manage a stack of LINE_READERs in order to handle nested file inclusion.
  150. *
  151. * This function pushes aLineReader onto the top of a stack of LINE_READERs and makes
  152. * it the current #LINE_READER with its own #GetSource(), line number and line text.
  153. * A grammar must be designed such that the "include" token (whatever its various names),
  154. * and any of its parameters are not followed by anything on that same line,
  155. * because PopReader always starts reading from a new line upon returning to
  156. * the original #LINE_READER.
  157. */
  158. void PushReader( LINE_READER* aLineReader );
  159. /**
  160. * Delete the top most #LINE_READER from an internal stack of LINE_READERs and
  161. * in the case of #FILE_LINE_READER this means the associated FILE is closed.
  162. *
  163. * The most recently used former #LINE_READER on the stack becomes the
  164. * current #LINE_READER and its previous position in its input stream and the
  165. * its latest line number should pertain. PopReader always starts reading
  166. * from a new line upon returning to the previous #LINE_READER. A pop is only
  167. * possible if there are at least 2 #LINE_READERs on the stack, since popping
  168. * the last one is not supported.
  169. *
  170. * @return the LINE_READER that was in use before the pop, or NULL
  171. * if there was not at least two readers on the stack and therefore the
  172. * pop failed.
  173. */
  174. LINE_READER* PopReader();
  175. /**
  176. * Return the next token found in the input file or DSN_EOF when reaching the end of
  177. * file.
  178. *
  179. * Users should wrap this function to return an enum to aid in grammar debugging while
  180. * running under a debugger, but leave this lower level function returning an int (so
  181. * the enum does not collide with another usage).
  182. *
  183. * @return the type of token found next.
  184. * @throw IO_ERROR only if the #LINE_READER throws it.
  185. */
  186. int NextTok();
  187. /**
  188. * Call #NextTok() and then verifies that the token read in satisfies #IsSymbol().
  189. *
  190. * @return the actual token read in.
  191. * @throw IO_ERROR if the next token does not satisfy IsSymbol().
  192. */
  193. int NeedSYMBOL();
  194. /**
  195. * Call #NextTok() and then verifies that the token read in satisfies bool IsSymbol() or
  196. * the next token is #DSN_NUMBER.
  197. *
  198. * @return the actual token read in.
  199. * @throw IO_ERROR if the next token does not satisfy the above test.
  200. */
  201. int NeedSYMBOLorNUMBER();
  202. /**
  203. * Call #NextTok() and then verifies that the token read is type #DSN_NUMBER.
  204. *
  205. * @return the actual token read in.
  206. * @throw IO_ERROR if the next token does not satisfy the above test.
  207. */
  208. int NeedNUMBER( const char* aExpectation );
  209. /**
  210. * Return whatever #NextTok() returned the last time it was called.
  211. */
  212. int CurTok() const
  213. {
  214. return curTok;
  215. }
  216. /**
  217. * Return whatever NextTok() returned the 2nd to last time it was called.
  218. */
  219. int PrevTok() const
  220. {
  221. return prevTok;
  222. }
  223. /**
  224. * Used to support "loose" matches (quoted tokens).
  225. */
  226. int GetCurStrAsToken() const
  227. {
  228. return findToken( curText );
  229. }
  230. /**
  231. * Change the string delimiter from the default " to some other character and return
  232. * the old value.
  233. *
  234. * @param aStringDelimiter The character in lowest 8 bits.
  235. * @return The old delimiter in the lowest 8 bits.
  236. */
  237. char SetStringDelimiter( char aStringDelimiter )
  238. {
  239. int old = stringDelimiter;
  240. if( specctraMode )
  241. stringDelimiter = aStringDelimiter;
  242. return old;
  243. }
  244. /**
  245. * Change the setting controlling whether a space in a quoted string isa terminator.
  246. *
  247. * @param val If true, means
  248. */
  249. bool SetSpaceInQuotedTokens( bool val )
  250. {
  251. bool old = space_in_quoted_tokens;
  252. if( specctraMode )
  253. space_in_quoted_tokens = val;
  254. return old;
  255. }
  256. /**
  257. * Change the handling of comments.
  258. *
  259. * If set true, comments are returned as single line strings with a terminating newline.
  260. * Otherwise they are consumed by the lexer and not returned.
  261. */
  262. bool SetCommentsAreTokens( bool val )
  263. {
  264. bool old = commentsAreTokens;
  265. commentsAreTokens = val;
  266. return old;
  267. }
  268. /**
  269. * Check the next sequence of tokens and reads them into a wxArrayString if they are
  270. * comments.
  271. *
  272. * Reading continues until a non-comment token is encountered, and such last read token
  273. * remains as #CurTok() and as #CurText(). No push back or "un get" mechanism is used
  274. * for this support. Upon return you simply avoid calling NextTok() for the next token,
  275. * but rather #CurTok().
  276. *
  277. * @return Heap allocated block of comments or NULL if none. The caller owns the
  278. * allocation and must delete if not NULL.
  279. */
  280. wxArrayString* ReadCommentLines();
  281. /**
  282. * Test a token to see if it is a symbol.
  283. *
  284. * This means it cannot be a special delimiter character such as #DSN_LEFT, #DSN_RIGHT,
  285. * #DSN_QUOTE, etc. It may however, coincidentally match a keyword and still be a symbol.
  286. */
  287. static bool IsSymbol( int aTok );
  288. /**
  289. * Throw an #IO_ERROR exception with an input file specific error message.
  290. *
  291. * @param aTok is the token/keyword type which was expected at the current input location.
  292. * @throw IO_ERROR with the location within the input file of the problem.
  293. */
  294. void Expecting( int aTok ) const;
  295. /**
  296. * Throw an #IO_ERROR exception with an input file specific error message.
  297. *
  298. * @param aTokenList is the token/keyword type which was expected at the
  299. * current input location, e.g. "pin|graphic|property".
  300. * @throw IO_ERROR with the location within the input file of the problem.
  301. */
  302. void Expecting( const char* aTokenList ) const;
  303. /**
  304. * Throw an #IO_ERROR exception with an input file specific error message.
  305. *
  306. * @param aTok is the token/keyword type which was not expected at the
  307. * current input location.
  308. * @throw IO_ERROR with the location within the input file of the problem.
  309. */
  310. void Unexpected( int aTok ) const;
  311. /**
  312. * Throw an #IO_ERROR exception with an input file specific error message.
  313. *
  314. * @param aToken is the token which was not expected at the current input location.
  315. * @throw IO_ERROR with the location within the input file of the problem.
  316. */
  317. void Unexpected( const char* aToken ) const;
  318. /**
  319. * Throw an #IO_ERROR exception with a message saying specifically that \a aTok
  320. * is a duplicate of one already seen in current context.
  321. *
  322. * @param aTok is the token/keyword type which was not expected at the current input
  323. * location.
  324. * @throw IO_ERROR with the location within the input file of the problem.
  325. */
  326. void Duplicate( int aTok );
  327. /**
  328. * Call #NextTok() and then verifies that the token read in is a #DSN_LEFT.
  329. *
  330. * @throw IO_ERROR if the next token is not a #DSN_LEFT
  331. */
  332. void NeedLEFT();
  333. /**
  334. * Call #NextTok() and then verifies that the token read in is a #DSN_RIGHT.
  335. *
  336. * @throw IO_ERROR if the next token is not a #DSN_RIGHT
  337. */
  338. void NeedRIGHT();
  339. /**
  340. * Return the C string representation of a #DSN_T value.
  341. */
  342. const char* GetTokenText( int aTok ) const;
  343. /**
  344. * Return a quote wrapped wxString representation of a token value.
  345. */
  346. wxString GetTokenString( int aTok ) const;
  347. static const char* Syntax( int aTok );
  348. /**
  349. * Return a pointer to the current token's text.
  350. */
  351. const char* CurText() const
  352. {
  353. return curText.c_str();
  354. }
  355. /**
  356. * Return a reference to current token in std::string form.
  357. */
  358. const std::string& CurStr() const
  359. {
  360. return curText;
  361. }
  362. /**
  363. * Return the current token text as a wxString, assuming that the input byte stream
  364. * is UTF8 encoded.
  365. */
  366. wxString FromUTF8() const
  367. {
  368. return wxString::FromUTF8( curText.c_str() );
  369. }
  370. /**
  371. * Return the current line number within my #LINE_READER.
  372. */
  373. int CurLineNumber() const
  374. {
  375. return reader->LineNumber();
  376. }
  377. /**
  378. * Return the current line of text from which the #CurText() would return its token.
  379. */
  380. const char* CurLine() const
  381. {
  382. return (const char*)(*reader);
  383. }
  384. /**
  385. * Return the current #LINE_READER source.
  386. *
  387. * @return source of the lines of text, e.g. a filename or "clipboard".
  388. */
  389. const wxString& CurSource() const
  390. {
  391. return reader->GetSource();
  392. }
  393. /**
  394. * Return the byte offset within the current line, using a 1 based index.
  395. *
  396. * @return a one based index into the current line.
  397. */
  398. int CurOffset() const
  399. {
  400. return curOffset + 1;
  401. }
  402. #ifndef SWIG
  403. protected:
  404. void init();
  405. int readLine()
  406. {
  407. if( reader )
  408. {
  409. reader->ReadLine();
  410. unsigned len = reader->Length();
  411. // start may have changed in ReadLine(), which can resize and
  412. // relocate reader's line buffer.
  413. start = reader->Line();
  414. next = start;
  415. limit = next + len;
  416. return len;
  417. }
  418. return 0;
  419. }
  420. /**
  421. * Take @a aToken string and looks up the string in the keywords table.
  422. *
  423. * @param aToken is a string to lookup in the keywords table.
  424. * @return with a value from the enum #DSN_T matching the keyword text,
  425. * or #DSN_SYMBOL if @a aToken is not in the keywords table.
  426. */
  427. int findToken( const std::string& aToken ) const;
  428. bool isStringTerminator( char cc ) const
  429. {
  430. if( !space_in_quoted_tokens && cc == ' ' )
  431. return true;
  432. if( cc == stringDelimiter )
  433. return true;
  434. return false;
  435. }
  436. /**
  437. * Parse the current token as an ASCII numeric string with possible leading
  438. * whitespace into a double precision floating point number.
  439. *
  440. * @throw IO_ERROR if an error occurs attempting to convert the current token.
  441. * @return The result of the parsed token.
  442. */
  443. double parseDouble();
  444. double parseDouble( const char* aExpected )
  445. {
  446. NeedNUMBER( aExpected );
  447. return parseDouble();
  448. }
  449. template <typename T>
  450. inline double parseDouble( T aToken )
  451. {
  452. return parseDouble( GetTokenText( aToken ) );
  453. }
  454. bool iOwnReaders; ///< on readerStack, should I delete them?
  455. const char* start;
  456. const char* next;
  457. const char* limit;
  458. char dummy[1]; ///< when there is no reader.
  459. typedef std::vector<LINE_READER*> READER_STACK;
  460. READER_STACK readerStack; ///< all the LINE_READERs by pointer.
  461. ///< no ownership. ownership is via readerStack, maybe, if iOwnReaders
  462. LINE_READER* reader;
  463. bool specctraMode; ///< if true, then:
  464. ///< 1) stringDelimiter can be changed
  465. ///< 2) Kicad quoting protocol is not in effect
  466. ///< 3) space_in_quoted_tokens is functional
  467. ///< else not.
  468. char stringDelimiter;
  469. bool space_in_quoted_tokens; ///< blank spaces within quoted strings
  470. bool commentsAreTokens; ///< true if should return comments as tokens
  471. int prevTok; ///< curTok from previous NextTok() call.
  472. int curOffset; ///< offset within current line of the current token
  473. int curTok; ///< the current token obtained on last NextTok()
  474. std::string curText; ///< the text of the current token
  475. const KEYWORD* keywords; ///< table sorted by CMake for bsearch()
  476. unsigned keywordCount; ///< count of keywords table
  477. const KEYWORD_MAP* keywordsLookup; ///< fast, specialized "C string" hashtable
  478. #endif // SWIG
  479. };
  480. #endif // DSNLEXER_H_