You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

530 lines
18 KiB

15 years ago
15 years ago
15 years ago
  1. /*
  2. * This program source code file is part of KICAD, a free EDA CAD application.
  3. *
  4. * Copyright (C) 2007-2010 SoftPLC Corporation, Dick Hollenbeck <dick@softplc.com>
  5. * Copyright (C) 2007-2020 Kicad Developers, see change_log.txt for contributors.
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License
  9. * as published by the Free Software Foundation; either version 2
  10. * of the License, or (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, you may find one here:
  19. * http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
  20. * or you may search the http://www.gnu.org website for the version 2 license,
  21. * or you may write to the Free Software Foundation, Inc.,
  22. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
  23. */
  24. #ifndef DSNLEXER_H_
  25. #define DSNLEXER_H_
  26. #include <cstdio>
  27. #include <hashtables.h>
  28. #include <string>
  29. #include <vector>
  30. #include <richio.h>
  31. #ifndef SWIG
  32. /**
  33. * Hold a keyword string and its unique integer token.
  34. */
  35. struct KEYWORD
  36. {
  37. const char* name; ///< unique keyword.
  38. int token; ///< a zero based index into an array of KEYWORDs
  39. };
  40. #endif // SWIG
  41. // something like this macro can be used to help initialize a KEYWORD table.
  42. // see SPECCTRA_DB::keywords[] as an example.
  43. //#define TOKDEF(x) { #x, T_##x }
  44. /**
  45. * List all the DSN lexer's tokens that are supported in lexing.
  46. *
  47. * It is up to the parser if it wants also to support them.
  48. */
  49. enum DSN_SYNTAX_T
  50. {
  51. DSN_NONE = -11,
  52. DSN_COMMENT = -10,
  53. DSN_STRING_QUOTE = -9,
  54. DSN_QUOTE_DEF = -8,
  55. DSN_DASH = -7,
  56. DSN_SYMBOL = -6,
  57. DSN_NUMBER = -5,
  58. DSN_RIGHT = -4, // right bracket, ')'
  59. DSN_LEFT = -3, // left bracket, '('
  60. DSN_STRING = -2, // a quoted string, stripped of the quotes
  61. DSN_EOF = -1 // special case for end of file
  62. };
  63. /**
  64. * Implement a lexical analyzer for the SPECCTRA DSN file format.
  65. *
  66. * It reads lexical tokens from the current #LINE_READER through the #NextTok() function.
  67. */
  68. class DSNLEXER
  69. {
  70. public:
  71. /**
  72. * Initialize a DSN lexer and prepares to read from aFile which is already open and has
  73. * \a aFilename.
  74. *
  75. * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount. This
  76. * token table need not contain the lexer separators such as '(' ')', etc.
  77. * @param aKeywordCount is the count of tokens in aKeywordTable.
  78. * @param aFile is an open file, which will be closed when this is destructed.
  79. * @param aFileName is the name of the file
  80. */
  81. DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount,
  82. FILE* aFile, const wxString& aFileName );
  83. /**
  84. * Initialize a DSN lexer and prepares to read from @a aSExpression.
  85. *
  86. * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount. This
  87. * token table need not contain the lexer separators such as '(' ')', etc.
  88. * @param aKeywordCount is the count of tokens in aKeywordTable.
  89. * @param aSExpression is text to feed through a STRING_LINE_READER
  90. * @param aSource is a description of aSExpression, used for error reporting.
  91. */
  92. DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount,
  93. const std::string& aSExpression, const wxString& aSource = wxEmptyString );
  94. /**
  95. * Initialize a DSN lexer and prepares to read from @a aSExpression.
  96. *
  97. * Use this one without a keyword table with the DOM parser in ptree.h.
  98. *
  99. * @param aSExpression is text to feed through a #STRING_LINE_READER
  100. * @param aSource is a description of aSExpression, used for error reporting.
  101. */
  102. DSNLEXER( const std::string& aSExpression, const wxString& aSource = wxEmptyString );
  103. /**
  104. * Initialize a DSN lexer and prepares to read from @a aLineReader which is already
  105. * open, and may be in use by other DSNLEXERs also.
  106. *
  107. * No ownership is taken of @a aLineReader. This enables it to be used by other DSNLEXERs.
  108. *
  109. * @param aKeywordTable is an array of #KEYWORDS holding \a aKeywordCount. This
  110. * token table need not contain the lexer separators such as '(' ')', etc.
  111. * @param aKeywordCount is the count of tokens in aKeywordTable.
  112. * @param aLineReader is any subclassed instance of LINE_READER, such as
  113. * #STRING_LINE_READER or #FILE_LINE_READER. No ownership is taken.
  114. */
  115. DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount,
  116. LINE_READER* aLineReader = NULL );
  117. virtual ~DSNLEXER();
  118. /**
  119. * Usable only for DSN lexers which share the same #LINE_READER.
  120. *
  121. * Synchronizes the pointers handling the data read by the #LINE_READER. Allows 2
  122. * #DNSLEXER objects to share the same current line, when switching from a #DNSLEXER
  123. * to another #DNSLEXER
  124. * @param aLexer the model.
  125. * @return true if the sync can be made ( at least the same line reader ).
  126. */
  127. bool SyncLineReaderWith( DSNLEXER& aLexer );
  128. /**
  129. * Change the behavior of this lexer into or out of "specctra mode".
  130. *
  131. * If specctra mode, then:
  132. * -#) stringDelimiter can be changed.
  133. * -#) KiCad quoting protocol is not in effect.
  134. * -#) space_in_quoted_tokens is functional else none of the above are true.
  135. *
  136. * The default mode is non-specctra mode, meaning:
  137. * -#) stringDelimiter cannot be changed.
  138. * -#) KiCad quoting protocol is in effect.
  139. * -#) space_in_quoted_tokens is not functional.
  140. */
  141. void SetSpecctraMode( bool aMode );
  142. /**
  143. * Manage a stack of LINE_READERs in order to handle nested file inclusion.
  144. *
  145. * This function pushes aLineReader onto the top of a stack of LINE_READERs and makes
  146. * it the current #LINE_READER with its own #GetSource(), line number and line text.
  147. * A grammar must be designed such that the "include" token (whatever its various names),
  148. * and any of its parameters are not followed by anything on that same line,
  149. * because PopReader always starts reading from a new line upon returning to
  150. * the original #LINE_READER.
  151. */
  152. void PushReader( LINE_READER* aLineReader );
  153. /**
  154. * Delete the top most #LINE_READER from an internal stack of LINE_READERs and
  155. * in the case of #FILE_LINE_READER this means the associated FILE is closed.
  156. *
  157. * The most recently used former #LINE_READER on the stack becomes the
  158. * current #LINE_READER and its previous position in its input stream and the
  159. * its latest line number should pertain. PopReader always starts reading
  160. * from a new line upon returning to the previous #LINE_READER. A pop is only
  161. * possible if there are at least 2 #LINE_READERs on the stack, since popping
  162. * the last one is not supported.
  163. *
  164. * @return the LINE_READER that was in use before the pop, or NULL
  165. * if there was not at least two readers on the stack and therefore the
  166. * pop failed.
  167. */
  168. LINE_READER* PopReader();
  169. /**
  170. * Return the next token found in the input file or DSN_EOF when reaching the end of
  171. * file.
  172. *
  173. * Users should wrap this function to return an enum to aid in grammar debugging while
  174. * running under a debugger, but leave this lower level function returning an int (so
  175. * the enum does not collide with another usage).
  176. *
  177. * @return the type of token found next.
  178. * @throw IO_ERROR only if the #LINE_READER throws it.
  179. */
  180. int NextTok();
  181. /**
  182. * Call #NextTok() and then verifies that the token read in satisfies #IsSymbol().
  183. *
  184. * @return the actual token read in.
  185. * @throw IO_ERROR if the next token does not satisfy IsSymbol().
  186. */
  187. int NeedSYMBOL();
  188. /**
  189. * Call #NextTok() and then verifies that the token read in satisfies bool IsSymbol() or
  190. * the next token is #DSN_NUMBER.
  191. *
  192. * @return the actual token read in.
  193. * @throw IO_ERROR if the next token does not satisfy the above test.
  194. */
  195. int NeedSYMBOLorNUMBER();
  196. /**
  197. * Call #NextTok() and then verifies that the token read is type #DSN_NUMBER.
  198. *
  199. * @return the actual token read in.
  200. * @throw IO_ERROR if the next token does not satisfy the above test.
  201. */
  202. int NeedNUMBER( const char* aExpectation );
  203. /**
  204. * Return whatever #NextTok() returned the last time it was called.
  205. */
  206. int CurTok() const
  207. {
  208. return curTok;
  209. }
  210. /**
  211. * Return whatever NextTok() returned the 2nd to last time it was called.
  212. */
  213. int PrevTok() const
  214. {
  215. return prevTok;
  216. }
  217. /**
  218. * Used to support "loose" matches (quoted tokens).
  219. */
  220. int GetCurStrAsToken() const
  221. {
  222. return findToken( curText );
  223. }
  224. /**
  225. * Change the string delimiter from the default " to some other character and return
  226. * the old value.
  227. *
  228. * @param aStringDelimiter The character in lowest 8 bits.
  229. * @return The old delimiter in the lowest 8 bits.
  230. */
  231. char SetStringDelimiter( char aStringDelimiter )
  232. {
  233. int old = stringDelimiter;
  234. if( specctraMode )
  235. stringDelimiter = aStringDelimiter;
  236. return old;
  237. }
  238. /**
  239. * Change the setting controlling whether a space in a quoted string isa terminator.
  240. *
  241. * @param val If true, means
  242. */
  243. bool SetSpaceInQuotedTokens( bool val )
  244. {
  245. bool old = space_in_quoted_tokens;
  246. if( specctraMode )
  247. space_in_quoted_tokens = val;
  248. return old;
  249. }
  250. /**
  251. * Change the handling of comments.
  252. *
  253. * If set true, comments are returned as single line strings with a terminating newline.
  254. * Otherwise they are consumed by the lexer and not returned.
  255. */
  256. bool SetCommentsAreTokens( bool val )
  257. {
  258. bool old = commentsAreTokens;
  259. commentsAreTokens = val;
  260. return old;
  261. }
  262. /**
  263. * Check the next sequence of tokens and reads them into a wxArrayString if they are
  264. * comments.
  265. *
  266. * Reading continues until a non-comment token is encountered, and such last read token
  267. * remains as #CurTok() and as #CurText(). No push back or "un get" mechanism is used
  268. * for this support. Upon return you simply avoid calling NextTok() for the next token,
  269. * but rather #CurTok().
  270. *
  271. * @return Heap allocated block of comments or NULL if none. The caller owns the
  272. * allocation and must delete if not NULL.
  273. */
  274. wxArrayString* ReadCommentLines();
  275. /**
  276. * Test a token to see if it is a symbol.
  277. *
  278. * This means it cannot be a special delimiter character such as #DSN_LEFT, #DSN_RIGHT,
  279. * #DSN_QUOTE, etc. It may however, coincidentally match a keyword and still be a symbol.
  280. */
  281. static bool IsSymbol( int aTok );
  282. /**
  283. * Throw an #IO_ERROR exception with an input file specific error message.
  284. *
  285. * @param aTok is the token/keyword type which was expected at the current input location.
  286. * @throw IO_ERROR with the location within the input file of the problem.
  287. */
  288. void Expecting( int aTok ) const;
  289. /**
  290. * Throw an #IO_ERROR exception with an input file specific error message.
  291. *
  292. * @param aTokenList is the token/keyword type which was expected at the
  293. * current input location, e.g. "pin|graphic|property".
  294. * @throw IO_ERROR with the location within the input file of the problem.
  295. */
  296. void Expecting( const char* aTokenList ) const;
  297. /**
  298. * Throw an #IO_ERROR exception with an input file specific error message.
  299. *
  300. * @param aTok is the token/keyword type which was not expected at the
  301. * current input location.
  302. * @throw IO_ERROR with the location within the input file of the problem.
  303. */
  304. void Unexpected( int aTok ) const;
  305. /**
  306. * Throw an #IO_ERROR exception with an input file specific error message.
  307. *
  308. * @param aToken is the token which was not expected at the current input location.
  309. * @throw IO_ERROR with the location within the input file of the problem.
  310. */
  311. void Unexpected( const char* aToken ) const;
  312. /**
  313. * Throw an #IO_ERROR exception with a message saying specifically that \a aTok
  314. * is a duplicate of one already seen in current context.
  315. *
  316. * @param aTok is the token/keyword type which was not expected at the current input
  317. * location.
  318. * @throw IO_ERROR with the location within the input file of the problem.
  319. */
  320. void Duplicate( int aTok );
  321. /**
  322. * Call #NextTok() and then verifies that the token read in is a #DSN_LEFT.
  323. *
  324. * @throw IO_ERROR if the next token is not a #DSN_LEFT
  325. */
  326. void NeedLEFT();
  327. /**
  328. * Call #NextTok() and then verifies that the token read in is a #DSN_RIGHT.
  329. *
  330. * @throw IO_ERROR if the next token is not a #DSN_RIGHT
  331. */
  332. void NeedRIGHT();
  333. /**
  334. * Return the C string representation of a #DSN_T value.
  335. */
  336. const char* GetTokenText( int aTok ) const;
  337. /**
  338. * Return a quote wrapped wxString representation of a token value.
  339. */
  340. wxString GetTokenString( int aTok ) const;
  341. static const char* Syntax( int aTok );
  342. /**
  343. * Return a pointer to the current token's text.
  344. */
  345. const char* CurText() const
  346. {
  347. return curText.c_str();
  348. }
  349. /**
  350. * Return a reference to current token in std::string form.
  351. */
  352. const std::string& CurStr() const
  353. {
  354. return curText;
  355. }
  356. /**
  357. * Return the current token text as a wxString, assuming that the input byte stream
  358. * is UTF8 encoded.
  359. */
  360. wxString FromUTF8() const
  361. {
  362. return wxString::FromUTF8( curText.c_str() );
  363. }
  364. /**
  365. * Return the current line number within my #LINE_READER.
  366. */
  367. int CurLineNumber() const
  368. {
  369. return reader->LineNumber();
  370. }
  371. /**
  372. * Return the current line of text from which the #CurText() would return its token.
  373. */
  374. const char* CurLine() const
  375. {
  376. return (const char*)(*reader);
  377. }
  378. /**
  379. * Return the current #LINE_READER source.
  380. *
  381. * @return source of the lines of text, e.g. a filename or "clipboard".
  382. */
  383. const wxString& CurSource() const
  384. {
  385. return reader->GetSource();
  386. }
  387. /**
  388. * Return the byte offset within the current line, using a 1 based index.
  389. *
  390. * @return a one based index into the current line.
  391. */
  392. int CurOffset() const
  393. {
  394. return curOffset + 1;
  395. }
  396. #ifndef SWIG
  397. protected:
  398. void init();
  399. int readLine()
  400. {
  401. if( reader )
  402. {
  403. reader->ReadLine();
  404. unsigned len = reader->Length();
  405. // start may have changed in ReadLine(), which can resize and
  406. // relocate reader's line buffer.
  407. start = reader->Line();
  408. next = start;
  409. limit = next + len;
  410. return len;
  411. }
  412. return 0;
  413. }
  414. /**
  415. * Take @a aToken string and looks up the string in the keywords table.
  416. *
  417. * @param aToken is a string to lookup in the keywords table.
  418. * @return with a value from the enum #DSN_T matching the keyword text,
  419. * or #DSN_SYMBOL if @a aToken is not in the keywords table.
  420. */
  421. int findToken( const std::string& aToken ) const;
  422. bool isStringTerminator( char cc ) const
  423. {
  424. if( !space_in_quoted_tokens && cc == ' ' )
  425. return true;
  426. if( cc == stringDelimiter )
  427. return true;
  428. return false;
  429. }
  430. bool iOwnReaders; ///< on readerStack, should I delete them?
  431. const char* start;
  432. const char* next;
  433. const char* limit;
  434. char dummy[1]; ///< when there is no reader.
  435. typedef std::vector<LINE_READER*> READER_STACK;
  436. READER_STACK readerStack; ///< all the LINE_READERs by pointer.
  437. ///< no ownership. ownership is via readerStack, maybe, if iOwnReaders
  438. LINE_READER* reader;
  439. bool specctraMode; ///< if true, then:
  440. ///< 1) stringDelimiter can be changed
  441. ///< 2) Kicad quoting protocol is not in effect
  442. ///< 3) space_in_quoted_tokens is functional
  443. ///< else not.
  444. char stringDelimiter;
  445. bool space_in_quoted_tokens; ///< blank spaces within quoted strings
  446. bool commentsAreTokens; ///< true if should return comments as tokens
  447. int prevTok; ///< curTok from previous NextTok() call.
  448. int curOffset; ///< offset within current line of the current token
  449. int curTok; ///< the current token obtained on last NextTok()
  450. std::string curText; ///< the text of the current token
  451. const KEYWORD* keywords; ///< table sorted by CMake for bsearch()
  452. unsigned keywordCount; ///< count of keywords table
  453. KEYWORD_MAP keyword_hash; ///< fast, specialized "C string" hashtable
  454. #endif // SWIG
  455. };
  456. #endif // DSNLEXER_H_