You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

544 lines
19 KiB

15 years ago
15 years ago
15 years ago
  1. /*
  2. * This program source code file is part of KICAD, a free EDA CAD application.
  3. *
  4. * Copyright (C) 2007-2010 SoftPLC Corporation, Dick Hollenbeck <dick@softplc.com>
  5. * Copyright (C) 2007-2015 Kicad Developers, see change_log.txt for contributors.
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License
  9. * as published by the Free Software Foundation; either version 2
  10. * of the License, or (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, you may find one here:
  19. * http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
  20. * or you may search the http://www.gnu.org website for the version 2 license,
  21. * or you may write to the Free Software Foundation, Inc.,
  22. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
  23. */
  24. #ifndef DSNLEXER_H_
  25. #define DSNLEXER_H_
  26. #include <stdio.h>
  27. #include <string>
  28. #include <vector>
  29. #include <hashtables.h>
  30. #include <richio.h>
  31. #ifndef SWIG
  32. /**
  33. * Struct KEYWORD
  34. * holds a keyword string and its unique integer token.
  35. */
  36. struct KEYWORD
  37. {
  38. const char* name; ///< unique keyword.
  39. int token; ///< a zero based index into an array of KEYWORDs
  40. };
  41. #endif
  42. // something like this macro can be used to help initialize a KEYWORD table.
  43. // see SPECCTRA_DB::keywords[] as an example.
  44. //#define TOKDEF(x) { #x, T_##x }
  45. /**
  46. * Enum DSN_SYNTAX_T
  47. * lists all the DSN lexer's tokens that are supported in lexing. It is up
  48. * to the parser if it wants also to support them.
  49. */
  50. enum DSN_SYNTAX_T {
  51. DSN_NONE = -11,
  52. DSN_COMMENT = -10,
  53. DSN_STRING_QUOTE = -9,
  54. DSN_QUOTE_DEF = -8,
  55. DSN_DASH = -7,
  56. DSN_SYMBOL = -6,
  57. DSN_NUMBER = -5,
  58. DSN_RIGHT = -4, // right bracket, ')'
  59. DSN_LEFT = -3, // left bracket, '('
  60. DSN_STRING = -2, // a quoted string, stripped of the quotes
  61. DSN_EOF = -1 // special case for end of file
  62. };
  63. /**
  64. * Class DSNLEXER
  65. * implements a lexical analyzer for the SPECCTRA DSN file format. It
  66. * reads lexical tokens from the current LINE_READER through the NextTok()
  67. * function.
  68. */
  69. class DSNLEXER
  70. {
  71. #ifndef SWIG
  72. protected:
  73. bool iOwnReaders; ///< on readerStack, should I delete them?
  74. const char* start;
  75. const char* next;
  76. const char* limit;
  77. char dummy[1]; ///< when there is no reader.
  78. typedef std::vector<LINE_READER*> READER_STACK;
  79. READER_STACK readerStack; ///< all the LINE_READERs by pointer.
  80. LINE_READER* reader; ///< no ownership. ownership is via readerStack, maybe, if iOwnReaders
  81. bool specctraMode; ///< if true, then:
  82. ///< 1) stringDelimiter can be changed
  83. ///< 2) Kicad quoting protocol is not in effect
  84. ///< 3) space_in_quoted_tokens is functional
  85. ///< else not.
  86. char stringDelimiter;
  87. bool space_in_quoted_tokens; ///< blank spaces within quoted strings
  88. bool commentsAreTokens; ///< true if should return comments as tokens
  89. int prevTok; ///< curTok from previous NextTok() call.
  90. int curOffset; ///< offset within current line of the current token
  91. int curTok; ///< the current token obtained on last NextTok()
  92. std::string curText; ///< the text of the current token
  93. const KEYWORD* keywords; ///< table sorted by CMake for bsearch()
  94. unsigned keywordCount; ///< count of keywords table
  95. KEYWORD_MAP keyword_hash; ///< fast, specialized "C string" hashtable
  96. void init();
  97. int readLine() throw( IO_ERROR )
  98. {
  99. if( reader )
  100. {
  101. reader->ReadLine();
  102. unsigned len = reader->Length();
  103. // start may have changed in ReadLine(), which can resize and
  104. // relocate reader's line buffer.
  105. start = reader->Line();
  106. next = start;
  107. limit = next + len;
  108. return len;
  109. }
  110. return 0;
  111. }
  112. /**
  113. * Function findToken
  114. * takes aToken string and looks up the string in the keywords table.
  115. *
  116. * @param aToken is a string to lookup in the keywords table.
  117. * @return int - with a value from the enum DSN_T matching the keyword text,
  118. * or DSN_SYMBOL if @a aToken is not in the kewords table.
  119. */
  120. int findToken( const std::string& aToken );
  121. bool isStringTerminator( char cc )
  122. {
  123. if( !space_in_quoted_tokens && cc==' ' )
  124. return true;
  125. if( cc == stringDelimiter )
  126. return true;
  127. return false;
  128. }
  129. #endif
  130. public:
  131. /**
  132. * Constructor ( FILE*, const wxString& )
  133. * intializes a DSN lexer and prepares to read from aFile which
  134. * is already open and has aFilename.
  135. *
  136. * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount. This
  137. * token table need not contain the lexer separators such as '(' ')', etc.
  138. * @param aKeywordCount is the count of tokens in aKeywordTable.
  139. * @param aFile is an open file, which will be closed when this is destructed.
  140. * @param aFileName is the name of the file
  141. */
  142. DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount,
  143. FILE* aFile, const wxString& aFileName );
  144. /**
  145. * Constructor ( const KEYWORD*, unsigned, const std::string&, const wxString& )
  146. * intializes a DSN lexer and prepares to read from @a aSExpression.
  147. *
  148. * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount. This
  149. * token table need not contain the lexer separators such as '(' ')', etc.
  150. * @param aKeywordCount is the count of tokens in aKeywordTable.
  151. * @param aSExpression is text to feed through a STRING_LINE_READER
  152. * @param aSource is a description of aSExpression, used for error reporting.
  153. */
  154. DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount,
  155. const std::string& aSExpression, const wxString& aSource = wxEmptyString );
  156. /**
  157. * Constructor ( const std::string&, const wxString& )
  158. * intializes a DSN lexer and prepares to read from @a aSExpression. Use this
  159. * one without a keyword table with the DOM parser in ptree.h.
  160. *
  161. * @param aSExpression is text to feed through a STRING_LINE_READER
  162. * @param aSource is a description of aSExpression, used for error reporting.
  163. */
  164. DSNLEXER( const std::string& aSExpression, const wxString& aSource = wxEmptyString );
  165. /**
  166. * Constructor ( LINE_READER* )
  167. * intializes a DSN lexer and prepares to read from @a aLineReader which
  168. * is already open, and may be in use by other DSNLEXERs also. No ownership
  169. * is taken of @a aLineReader. This enables it to be used by other DSNLEXERs also.
  170. *
  171. * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount. This
  172. * token table need not contain the lexer separators such as '(' ')', etc.
  173. *
  174. * @param aKeywordCount is the count of tokens in aKeywordTable.
  175. *
  176. * @param aLineReader is any subclassed instance of LINE_READER, such as
  177. * STRING_LINE_READER or FILE_LINE_READER. No ownership is taken.
  178. */
  179. DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount,
  180. LINE_READER* aLineReader = NULL );
  181. virtual ~DSNLEXER();
  182. /**
  183. * Useable only for DSN lexers which share the same LINE_READER
  184. * Synchronizes the pointers handling the data read by the LINE_READER
  185. * Allows 2 DNSLEXER to share the same current line, when switching from a
  186. * DNSLEXER to an other DNSLEXER
  187. * @param aLexer = the model
  188. * @return true if the sync can be made ( at least the same line reader )
  189. */
  190. bool SyncLineReaderWith( DSNLEXER& aLexer );
  191. /**
  192. * Function SetSpecctraMode
  193. * changes the behavior of this lexer into or out of "specctra mode". If
  194. * specctra mode, then:
  195. * 1) stringDelimiter can be changed
  196. * 2) Kicad quoting protocol is not in effect
  197. * 3) space_in_quoted_tokens is functional
  198. * else none of the above are true. The default mode is non-specctra mode, meaning:
  199. * 1) stringDelimiter cannot be changed
  200. * 2) Kicad quoting protocol is in effect
  201. * 3) space_in_quoted_tokens is not functional
  202. */
  203. void SetSpecctraMode( bool aMode );
  204. /**
  205. * Function PushReader
  206. * manages a stack of LINE_READERs in order to handle nested file inclusion.
  207. * This function pushes aLineReader onto the top of a stack of LINE_READERs and makes
  208. * it the current LINE_READER with its own GetSource(), line number and line text.
  209. * A grammar must be designed such that the "include" token (whatever its various names),
  210. * and any of its parameters are not followed by anything on that same line,
  211. * because PopReader always starts reading from a new line upon returning to
  212. * the original LINE_READER.
  213. */
  214. void PushReader( LINE_READER* aLineReader );
  215. /**
  216. * Function PopReader
  217. * deletes the top most LINE_READER from an internal stack of LINE_READERs and
  218. * in the case of FILE_LINE_READER this means the associated FILE is closed.
  219. * The most recently used former LINE_READER on the stack becomes the
  220. * current LINE_READER and its previous position in its input stream and the
  221. * its latest line number should pertain. PopReader always starts reading
  222. * from a new line upon returning to the previous LINE_READER. A pop is only
  223. * possible if there are at least 2 LINE_READERs on the stack, since popping
  224. * the last one is not supported.
  225. *
  226. * @return LINE_READER* - is the one that was in use before the pop, or NULL
  227. * if there was not at least two readers on the stack and therefore the
  228. * pop failed.
  229. */
  230. LINE_READER* PopReader();
  231. // Some functions whose return value is best overloaded to return an enum
  232. // in a derived class.
  233. //-----<overload return values to tokens>------------------------------
  234. /**
  235. * Function NextTok
  236. * returns the next token found in the input file or DSN_EOF when reaching
  237. * the end of file. Users should wrap this function to return an enum
  238. * to aid in grammar debugging while running under a debugger, but leave
  239. * this lower level function returning an int (so the enum does not collide
  240. * with another usage).
  241. * @return int - the type of token found next.
  242. * @throw IO_ERROR - only if the LINE_READER throws it.
  243. */
  244. int NextTok() throw( IO_ERROR );
  245. /**
  246. * Function NeedSYMBOL
  247. * calls NextTok() and then verifies that the token read in
  248. * satisfies bool IsSymbol().
  249. * If not, an IO_ERROR is thrown.
  250. * @return int - the actual token read in.
  251. * @throw IO_ERROR, if the next token does not satisfy IsSymbol()
  252. */
  253. int NeedSYMBOL() throw( IO_ERROR );
  254. /**
  255. * Function NeedSYMBOLorNUMBER
  256. * calls NextTok() and then verifies that the token read in
  257. * satisfies bool IsSymbol() or tok==DSN_NUMBER.
  258. * If not, an IO_ERROR is thrown.
  259. * @return int - the actual token read in.
  260. * @throw IO_ERROR, if the next token does not satisfy the above test
  261. */
  262. int NeedSYMBOLorNUMBER() throw( IO_ERROR );
  263. /**
  264. * Function NeedNUMBER
  265. * calls NextTok() and then verifies that the token read is type DSN_NUMBER.
  266. * If not, and IO_ERROR is thrown using text from aExpectation.
  267. * @return int - the actual token read in.
  268. * @throw IO_ERROR, if the next token does not satisfy the above test
  269. */
  270. int NeedNUMBER( const char* aExpectation ) throw( IO_ERROR );
  271. /**
  272. * Function CurTok
  273. * returns whatever NextTok() returned the last time it was called.
  274. */
  275. int CurTok()
  276. {
  277. return curTok;
  278. }
  279. /**
  280. * Function PrevTok
  281. * returns whatever NextTok() returned the 2nd to last time it was called.
  282. */
  283. int PrevTok()
  284. {
  285. return prevTok;
  286. }
  287. //-----</overload return values to tokens>-----------------------------
  288. /**
  289. * Function SetStringDelimiter
  290. * changes the string delimiter from the default " to some other character
  291. * and returns the old value.
  292. * @param aStringDelimiter The character in lowest 8 bits.
  293. * @return int - The old delimiter in the lowest 8 bits.
  294. */
  295. char SetStringDelimiter( char aStringDelimiter )
  296. {
  297. int old = stringDelimiter;
  298. if( specctraMode )
  299. stringDelimiter = aStringDelimiter;
  300. return old;
  301. }
  302. /**
  303. * Function SetSpaceInQuotedTokens
  304. * changes the setting controlling whether a space in a quoted string is
  305. * a terminator.
  306. * @param val If true, means
  307. */
  308. bool SetSpaceInQuotedTokens( bool val )
  309. {
  310. bool old = space_in_quoted_tokens;
  311. if( specctraMode )
  312. space_in_quoted_tokens = val;
  313. return old;
  314. }
  315. /**
  316. * Function SetCommentsAreTokens
  317. * changes the handling of comments. If set true, comments are returns
  318. * as single line strings with a terminating newline, else they are
  319. * consumed by the lexer and not returned.
  320. */
  321. bool SetCommentsAreTokens( bool val )
  322. {
  323. bool old = commentsAreTokens;
  324. commentsAreTokens = val;
  325. return old;
  326. }
  327. /**
  328. * Function ReadCommentLines
  329. * checks the next sequence of tokens and reads them into a wxArrayString
  330. * if they are comments. Reading continues until a non-comment token is
  331. * encountered, and such last read token remains as CurTok() and as CurText().
  332. * No push back or "un get" mechanism is used for this support. Upon return
  333. * you simply avoid calling NextTok() for the next token, but rather CurTok().
  334. *
  335. * @return wxArrayString* - heap allocated block of comments, or NULL if none;
  336. * caller owns the allocation and must delete if not NULL.
  337. */
  338. wxArrayString* ReadCommentLines() throw( IO_ERROR );
  339. /**
  340. * Function IsSymbol
  341. * tests a token to see if it is a symbol. This means it cannot be a
  342. * special delimiter character such as DSN_LEFT, DSN_RIGHT, DSN_QUOTE, etc. It may
  343. * however, coincidentally match a keyword and still be a symbol.
  344. */
  345. static bool IsSymbol( int aTok );
  346. /**
  347. * Function Expecting
  348. * throws an IO_ERROR exception with an input file specific error message.
  349. * @param aTok is the token/keyword type which was expected at the current input location.
  350. * @throw IO_ERROR with the location within the input file of the problem.
  351. */
  352. void Expecting( int aTok ) throw( IO_ERROR );
  353. /**
  354. * Function Expecting
  355. * throws an IO_ERROR exception with an input file specific error message.
  356. * @param aTokenList is the token/keyword type which was expected at the
  357. * current input location, e.g. "pin|graphic|property"
  358. * @throw IO_ERROR with the location within the input file of the problem.
  359. */
  360. void Expecting( const char* aTokenList ) throw( IO_ERROR );
  361. /**
  362. * Function Unexpected
  363. * throws an IO_ERROR exception with an input file specific error message.
  364. * @param aTok is the token/keyword type which was not expected at the
  365. * current input location.
  366. * @throw IO_ERROR with the location within the input file of the problem.
  367. */
  368. void Unexpected( int aTok ) throw( IO_ERROR );
  369. /**
  370. * Function Unexpected
  371. * throws an IO_ERROR exception with an input file specific error message.
  372. * @param aToken is the token which was not expected at the
  373. * current input location.
  374. * @throw IO_ERROR with the location within the input file of the problem.
  375. */
  376. void Unexpected( const char* aToken ) throw( IO_ERROR );
  377. /**
  378. * Function Duplicate
  379. * throws an IO_ERROR exception with a message saying specifically that aTok
  380. * is a duplicate of one already seen in current context.
  381. * @param aTok is the token/keyword type which was not expected at the
  382. * current input location.
  383. * @throw IO_ERROR with the location within the input file of the problem.
  384. */
  385. void Duplicate( int aTok ) throw( IO_ERROR );
  386. /**
  387. * Function NeedLEFT
  388. * calls NextTok() and then verifies that the token read in is a DSN_LEFT.
  389. * If it is not, an IO_ERROR is thrown.
  390. * @throw IO_ERROR, if the next token is not a DSN_LEFT
  391. */
  392. void NeedLEFT() throw( IO_ERROR );
  393. /**
  394. * Function NeedRIGHT
  395. * calls NextTok() and then verifies that the token read in is a DSN_RIGHT.
  396. * If it is not, an IO_ERROR is thrown.
  397. * @throw IO_ERROR, if the next token is not a DSN_RIGHT
  398. */
  399. void NeedRIGHT() throw( IO_ERROR );
  400. /**
  401. * Function GetTokenText
  402. * returns the C string representation of a DSN_T value.
  403. */
  404. const char* GetTokenText( int aTok );
  405. /**
  406. * Function GetTokenString
  407. * returns a quote wrapped wxString representation of a token value.
  408. */
  409. wxString GetTokenString( int aTok );
  410. static const char* Syntax( int aTok );
  411. /**
  412. * Function CurText
  413. * returns a pointer to the current token's text.
  414. */
  415. const char* CurText()
  416. {
  417. return curText.c_str();
  418. }
  419. /**
  420. * Function CurStr
  421. * returns a reference to current token in std::string form.
  422. */
  423. const std::string& CurStr()
  424. {
  425. return curText;
  426. }
  427. /**
  428. * Function FromUTF8
  429. * returns the current token text as a wxString, assuming that the input
  430. * byte stream is UTF8 encoded.
  431. */
  432. wxString FromUTF8()
  433. {
  434. return wxString::FromUTF8( curText.c_str() );
  435. }
  436. /**
  437. * Function CurLineNumber
  438. * returns the current line number within my LINE_READER
  439. */
  440. int CurLineNumber()
  441. {
  442. return reader->LineNumber();
  443. }
  444. /**
  445. * Function CurLine
  446. * returns the current line of text, from which the CurText() would return
  447. * its token.
  448. */
  449. const char* CurLine()
  450. {
  451. return (const char*)(*reader);
  452. }
  453. /**
  454. * Function CurFilename
  455. * returns the current LINE_READER source.
  456. * @return const wxString& - the source of the lines of text,
  457. * e.g. a filename or "clipboard".
  458. */
  459. const wxString& CurSource()
  460. {
  461. return reader->GetSource();
  462. }
  463. /**
  464. * Function CurOffset
  465. * returns the byte offset within the current line, using a 1 based index.
  466. * @return int - a one based index into the current line.
  467. */
  468. int CurOffset()
  469. {
  470. return curOffset + 1;
  471. }
  472. };
  473. #endif // DSNLEXER_H_