kicad/include/dsnlexer.h


								/*

								 * This program source code file is part of KICAD, a free EDA CAD application.

								 *

								 * Copyright (C) 2007-2010 SoftPLC Corporation, Dick Hollenbeck <dick@softplc.com>

								 * Copyright (C) 2007-2015 Kicad Developers, see change_log.txt for contributors.

								 *

								 * This program is free software; you can redistribute it and/or

								 * modify it under the terms of the GNU General Public License

								 * as published by the Free Software Foundation; either version 2

								 * of the License, or (at your option) any later version.

								 *

								 * This program is distributed in the hope that it will be useful,

								 * but WITHOUT ANY WARRANTY; without even the implied warranty of

								 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

								 * GNU General Public License for more details.

								 *

								 * You should have received a copy of the GNU General Public License

								 * along with this program; if not, you may find one here:

								 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.html

								 * or you may search the http://www.gnu.org website for the version 2 license,

								 * or you may write to the Free Software Foundation, Inc.,

								 * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA

								 */


								#ifndef DSNLEXER_H_

								#define DSNLEXER_H_


								#include <stdio.h>

								#include <string>

								#include <vector>

								#include <hashtables.h>


								#include <richio.h>


								#ifndef SWIG

								/**

								 * Struct KEYWORD

								 * holds a keyword string and its unique integer token.

								 */

								struct KEYWORD

								{

								    const char* name;       ///< unique keyword.

								    int         token;      ///< a zero based index into an array of KEYWORDs

								};

								#endif


								// something like this macro can be used to help initialize a KEYWORD table.

								// see SPECCTRA_DB::keywords[] as an example.


								//#define TOKDEF(x)    { #x, T_##x }


								/**

								 * Enum DSN_SYNTAX_T

								 * lists all the DSN lexer's tokens that are supported in lexing.  It is up

								 * to the parser if it wants also to support them.

								 */

								enum DSN_SYNTAX_T {

								    DSN_NONE = -11,

								    DSN_COMMENT = -10,

								    DSN_STRING_QUOTE = -9,

								    DSN_QUOTE_DEF = -8,

								    DSN_DASH = -7,

								    DSN_SYMBOL = -6,

								    DSN_NUMBER = -5,

								    DSN_RIGHT = -4,           // right bracket, ')'

								    DSN_LEFT = -3,            // left bracket, '('

								    DSN_STRING = -2,          // a quoted string, stripped of the quotes

								    DSN_EOF = -1              // special case for end of file

								};


								/**

								 * Class DSNLEXER

								 * implements a lexical analyzer for the SPECCTRA DSN file format.  It

								 * reads lexical tokens from the current LINE_READER through the NextTok()

								 * function.

								 */

								class DSNLEXER

								{

								#ifndef SWIG

								protected:

								    bool                iOwnReaders;            ///< on readerStack, should I delete them?

								    const char*         start;

								    const char*         next;

								    const char*         limit;

								    char                dummy[1];               ///< when there is no reader.


								    typedef std::vector<LINE_READER*>  READER_STACK;


								    READER_STACK        readerStack;            ///< all the LINE_READERs by pointer.

								    LINE_READER*        reader;                 ///< no ownership. ownership is via readerStack, maybe, if iOwnReaders


								    bool                specctraMode;           ///< if true, then:

								                                                ///< 1) stringDelimiter can be changed

								                                                ///< 2) Kicad quoting protocol is not in effect

								                                                ///< 3) space_in_quoted_tokens is functional

								                                                ///< else not.


								    char                stringDelimiter;

								    bool                space_in_quoted_tokens; ///< blank spaces within quoted strings


								    bool                commentsAreTokens;      ///< true if should return comments as tokens


								    int                 prevTok;                ///< curTok from previous NextTok() call.

								    int                 curOffset;              ///< offset within current line of the current token


								    int                 curTok;                 ///< the current token obtained on last NextTok()

								    std::string         curText;                ///< the text of the current token


								    const KEYWORD*      keywords;               ///< table sorted by CMake for bsearch()

								    unsigned            keywordCount;           ///< count of keywords table

								    KEYWORD_MAP         keyword_hash;           ///< fast, specialized "C string" hashtable


								    void init();


								    int readLine()

								    {

								        if( reader )

								        {

								            reader->ReadLine();


								            unsigned len = reader->Length();


								            // start may have changed in ReadLine(), which can resize and

								            // relocate reader's line buffer.

								            start = reader->Line();


								            next  = start;

								            limit = next + len;


								            return len;

								        }

								        return 0;

								    }


								    /**

								     * Function findToken

								     * takes aToken string and looks up the string in the keywords table.

								     *

								     * @param aToken is a string to lookup in the keywords table.

								     * @return int - with a value from the enum DSN_T matching the keyword text,

								     *         or DSN_SYMBOL if @a aToken is not in the kewords table.

								     */

								    int findToken( const std::string& aToken );


								    bool isStringTerminator( char cc )

								    {

								        if( !space_in_quoted_tokens && cc==' ' )

								            return true;


								        if( cc == stringDelimiter )

								            return true;


								        return false;

								    }


								#endif


								public:


								    /**

								     * Constructor ( FILE*, const wxString& )

								     * intializes a DSN lexer and prepares to read from aFile which

								     * is already open and has aFilename.

								     *

								     * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount.  This

								     *  token table need not contain the lexer separators such as '(' ')', etc.

								     * @param aKeywordCount is the count of tokens in aKeywordTable.

								     * @param aFile is an open file, which will be closed when this is destructed.

								     * @param aFileName is the name of the file

								     */

								    DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount,

								              FILE* aFile, const wxString& aFileName );


								    /**

								     * Constructor ( const KEYWORD*, unsigned, const std::string&, const wxString& )

								     * intializes a DSN lexer and prepares to read from @a aSExpression.

								     *

								     * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount.  This

								     *  token table need not contain the lexer separators such as '(' ')', etc.

								     * @param aKeywordCount is the count of tokens in aKeywordTable.

								     * @param aSExpression is text to feed through a STRING_LINE_READER

								     * @param aSource is a description of aSExpression, used for error reporting.

								     */

								    DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount,

								              const std::string& aSExpression, const wxString& aSource = wxEmptyString );


								    /**

								     * Constructor ( const std::string&, const wxString& )

								     * intializes a DSN lexer and prepares to read from @a aSExpression.  Use this

								     * one without a keyword table with the DOM parser in ptree.h.

								     *

								     * @param aSExpression is text to feed through a STRING_LINE_READER

								     * @param aSource is a description of aSExpression, used for error reporting.

								     */

								    DSNLEXER( const std::string& aSExpression, const wxString& aSource = wxEmptyString );


								    /**

								     * Constructor ( LINE_READER* )

								     * intializes a DSN lexer and prepares to read from @a aLineReader which

								     * is already open, and may be in use by other DSNLEXERs also.  No ownership

								     * is taken of @a aLineReader. This enables it to be used by other DSNLEXERs also.

								     *

								     * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount.  This

								     *  token table need not contain the lexer separators such as '(' ')', etc.

								     *

								     * @param aKeywordCount is the count of tokens in aKeywordTable.

								     *

								     * @param aLineReader is any subclassed instance of LINE_READER, such as

								     *  STRING_LINE_READER or FILE_LINE_READER.  No ownership is taken.

								     */

								    DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount,

								              LINE_READER* aLineReader = NULL );


								    virtual ~DSNLEXER();


								    /**

								     * Useable only for DSN lexers which share the same LINE_READER

								     * Synchronizes the pointers handling the data read by the LINE_READER

								     * Allows 2 DNSLEXER to share the same current line, when switching from a

								     * DNSLEXER to another DNSLEXER

								     * @param aLexer = the model

								     * @return true if the sync can be made ( at least the same line reader )

								     */

								    bool SyncLineReaderWith( DSNLEXER& aLexer );


								    /**

								     * Function SetSpecctraMode

								     * changes the behavior of this lexer into or out of "specctra mode".  If

								     * specctra mode, then:

								     * 1) stringDelimiter can be changed

								     * 2) Kicad quoting protocol is not in effect

								     * 3) space_in_quoted_tokens is functional

								     * else none of the above are true.  The default mode is non-specctra mode, meaning:

								     * 1) stringDelimiter cannot be changed

								     * 2) Kicad quoting protocol is in effect

								     * 3) space_in_quoted_tokens is not functional

								     */

								    void SetSpecctraMode( bool aMode );


								    /**

								     * Function PushReader

								     * manages a stack of LINE_READERs in order to handle nested file inclusion.

								     * This function pushes aLineReader onto the top of a stack of LINE_READERs and makes

								     * it the current LINE_READER with its own GetSource(), line number and line text.

								     * A grammar must be designed such that the "include" token (whatever its various names),

								     * and any of its parameters are not followed by anything on that same line,

								     * because PopReader always starts reading from a new line upon returning to

								     * the original LINE_READER.

								     */

								    void PushReader( LINE_READER* aLineReader );


								    /**

								     * Function PopReader

								     * deletes the top most LINE_READER from an internal stack of LINE_READERs and

								     * in the case of FILE_LINE_READER this means the associated FILE is closed.

								     * The most recently used former LINE_READER on the stack becomes the

								     * current LINE_READER and its previous position in its input stream and the

								     * its latest line number should pertain.  PopReader always starts reading

								     * from a new line upon returning to the previous LINE_READER.  A pop is only

								     * possible if there are at least 2 LINE_READERs on the stack, since popping

								     * the last one is not supported.

								     *

								     * @return LINE_READER* - is the one that was in use before the pop, or NULL

								     *   if there was not at least two readers on the stack and therefore the

								     *   pop failed.

								     */

								    LINE_READER* PopReader();


								    // Some functions whose return value is best overloaded to return an enum

								    // in a derived class.

								    //-----<overload return values to tokens>------------------------------


								    /**

								     * Function NextTok

								     * returns the next token found in the input file or DSN_EOF when reaching

								     * the end of file.  Users should wrap this function to return an enum

								     * to aid in grammar debugging while running under a debugger, but leave

								     * this lower level function returning an int (so the enum does not collide

								     * with another usage).

								     * @return int - the type of token found next.

								     * @throw IO_ERROR - only if the LINE_READER throws it.

								     */

								    int NextTok();


								    /**

								     * Function NeedSYMBOL

								     * calls NextTok() and then verifies that the token read in

								     * satisfies bool IsSymbol().

								     * If not, an IO_ERROR is thrown.

								     * @return int - the actual token read in.

								     * @throw IO_ERROR, if the next token does not satisfy IsSymbol()

								     */

								    int NeedSYMBOL();


								    /**

								     * Function NeedSYMBOLorNUMBER

								     * calls NextTok() and then verifies that the token read in

								     * satisfies bool IsSymbol() or tok==DSN_NUMBER.

								     * If not, an IO_ERROR is thrown.

								     * @return int - the actual token read in.

								     * @throw IO_ERROR, if the next token does not satisfy the above test

								     */

								    int NeedSYMBOLorNUMBER();


								    /**

								     * Function NeedNUMBER

								     * calls NextTok() and then verifies that the token read is type DSN_NUMBER.

								     * If not, and IO_ERROR is thrown using text from aExpectation.

								     * @return int - the actual token read in.

								     * @throw IO_ERROR, if the next token does not satisfy the above test

								     */

								    int NeedNUMBER( const char* aExpectation );


								    /**

								     * Function CurTok

								     * returns whatever NextTok() returned the last time it was called.

								     */

								    int CurTok()

								    {

								        return curTok;

								    }


								    /**

								     * Function PrevTok

								     * returns whatever NextTok() returned the 2nd to last time it was called.

								     */

								    int PrevTok()

								    {

								        return prevTok;

								    }


								    //-----</overload return values to tokens>-----------------------------


								    /**

								     * Function SetStringDelimiter

								     * changes the string delimiter from the default " to some other character

								     * and returns the old value.

								     * @param aStringDelimiter The character in lowest 8 bits.

								     * @return int - The old delimiter in the lowest 8 bits.

								     */

								    char SetStringDelimiter( char aStringDelimiter )

								    {

								        int old = stringDelimiter;

								        if( specctraMode )

								            stringDelimiter = aStringDelimiter;

								        return old;

								    }


								    /**

								     * Function SetSpaceInQuotedTokens

								     * changes the setting controlling whether a space in a quoted string is

								     * a terminator.

								     * @param val If true, means

								     */

								    bool SetSpaceInQuotedTokens( bool val )

								    {

								        bool old = space_in_quoted_tokens;

								        if( specctraMode )

								            space_in_quoted_tokens = val;

								        return old;

								    }


								    /**

								     * Function SetCommentsAreTokens

								     * changes the handling of comments.  If set true, comments are returns

								     * as single line strings with a terminating newline, else they are

								     * consumed by the lexer and not returned.

								     */

								    bool SetCommentsAreTokens( bool val )

								    {

								        bool old = commentsAreTokens;

								        commentsAreTokens = val;

								        return old;

								    }


								    /**

								     * Function ReadCommentLines

								     * checks the next sequence of tokens and reads them into a wxArrayString

								     * if they are comments.  Reading continues until a non-comment token is

								     * encountered, and such last read token remains as CurTok() and as CurText().

								     * No push back or "un get" mechanism is used for this support.  Upon return

								     * you simply avoid calling NextTok() for the next token, but rather CurTok().

								     *

								     * @return wxArrayString* - heap allocated block of comments, or NULL if none;

								     *   caller owns the allocation and must delete if not NULL.

								     */

								    wxArrayString* ReadCommentLines();


								    /**

								     * Function IsSymbol

								     * tests a token to see if it is a symbol.  This means it cannot be a

								     * special delimiter character such as DSN_LEFT, DSN_RIGHT, DSN_QUOTE, etc.  It may

								     * however, coincidentally match a keyword and still be a symbol.

								     */

								    static bool IsSymbol( int aTok );


								    /**

								     * Function Expecting

								     * throws an IO_ERROR exception with an input file specific error message.

								     * @param aTok is the token/keyword type which was expected at the current input location.

								     * @throw IO_ERROR with the location within the input file of the problem.

								     */

								    void Expecting( int aTok );


								    /**

								     * Function Expecting

								     * throws an IO_ERROR exception with an input file specific error message.

								     * @param aTokenList is the token/keyword type which was expected at the

								     *         current input location, e.g.  "pin|graphic|property"

								     * @throw IO_ERROR with the location within the input file of the problem.

								     */

								    void Expecting( const char* aTokenList );


								    /**

								     * Function Unexpected

								     * throws an IO_ERROR exception with an input file specific error message.

								     * @param aTok is the token/keyword type which was not expected at the

								     *         current input location.

								     * @throw IO_ERROR with the location within the input file of the problem.

								     */

								    void Unexpected( int aTok );


								    /**

								     * Function Unexpected

								     * throws an IO_ERROR exception with an input file specific error message.

								     * @param aToken is the token which was not expected at the

								     *         current input location.

								     * @throw IO_ERROR with the location within the input file of the problem.

								     */

								    void Unexpected( const char* aToken );


								    /**

								     * Function Duplicate

								     * throws an IO_ERROR exception with a message saying specifically that aTok

								     * is a duplicate of one already seen in current context.

								     * @param aTok is the token/keyword type which was not expected at the

								     *         current input location.

								     * @throw IO_ERROR with the location within the input file of the problem.

								     */

								    void Duplicate( int aTok );


								    /**

								     * Function NeedLEFT

								     * calls NextTok() and then verifies that the token read in is a DSN_LEFT.

								     * If it is not, an IO_ERROR is thrown.

								     * @throw IO_ERROR, if the next token is not a DSN_LEFT

								     */

								    void NeedLEFT();


								    /**

								     * Function NeedRIGHT

								     * calls NextTok() and then verifies that the token read in is a DSN_RIGHT.

								     * If it is not, an IO_ERROR is thrown.

								     * @throw IO_ERROR, if the next token is not a DSN_RIGHT

								     */

								    void NeedRIGHT();


								    /**

								     * Function GetTokenText

								     * returns the C string representation of a DSN_T value.

								     */

								    const char* GetTokenText( int aTok );


								    /**

								     * Function GetTokenString

								     * returns a quote wrapped wxString representation of a token value.

								     */

								    wxString GetTokenString( int aTok );


								    static const char* Syntax( int aTok );


								    /**

								     * Function CurText

								     * returns a pointer to the current token's text.

								     */

								    const char* CurText()

								    {

								        return curText.c_str();

								    }


								    /**

								     * Function CurStr

								     * returns a reference to current token in std::string form.

								     */

								    const std::string& CurStr()

								    {

								        return curText;

								    }


								    /**

								     * Function FromUTF8

								     * returns the current token text as a wxString, assuming that the input

								     * byte stream is UTF8 encoded.

								     */

								    wxString FromUTF8()

								    {

								        return wxString::FromUTF8( curText.c_str() );

								    }


								    /**

								     * Function CurLineNumber

								     * returns the current line number within my LINE_READER

								     */

								    int CurLineNumber()

								    {

								        return reader->LineNumber();

								    }


								    /**

								     * Function CurLine

								     * returns the current line of text, from which the CurText() would return

								     * its token.

								     */

								    const char* CurLine()

								    {

								        return (const char*)(*reader);

								    }


								    /**

								     * Function CurFilename

								     * returns the current LINE_READER source.

								     * @return const wxString& - the source of the lines of text,

								     *   e.g. a filename or "clipboard".

								     */

								    const wxString& CurSource()

								    {

								        return reader->GetSource();

								    }


								    /**

								     * Function CurOffset

								     * returns the byte offset within the current line, using a 1 based index.

								     * @return int - a one based index into the current line.

								     */

								    int CurOffset()

								    {

								        return curOffset + 1;

								    }

								};


								#endif  // DSNLEXER_H_