From 604edcac3abb33d258c5d1377b98ec33ce1244fb Mon Sep 17 00:00:00 2001 From: Dick Hollenbeck Date: Sun, 30 Jan 2011 13:26:03 -0600 Subject: [PATCH] ++all: * DSNLEXER::NextTok() now uses two separate modes to parse quoted strings. This gives us the freedom to control our own destiny separate from the constraints put on us by the Specctra DSN spec. * Added Documentation/s-expressions.txt to explain all this. * Enhanced our quoting protocol by moving away from doubling up double quotes to a C line escape mechanism. * Now support multi-line strings, which when properly escaped, can still be read in as a token originating on a single line. --- CHANGELOG.txt | 12 +++ Documentation/s-expressions.txt | 89 +++++++++++++++++ common/dsnlexer.cpp | 167 +++++++++++++++++++++----------- common/richio.cpp | 69 +++++++------ include/dsnlexer.h | 32 +++++- pcbnew/ioascii.cpp | 2 +- pcbnew/specctra.cpp | 2 +- pcbnew/specctra.h | 2 + 8 files changed, 284 insertions(+), 91 deletions(-) create mode 100644 Documentation/s-expressions.txt diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 65c5f9c8d5..3942c8651b 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -4,6 +4,18 @@ KiCad ChangeLog 2010 Please add newer entries at the top, list the date and your name with email address. +2011-Jan-30 UPDATE Dick Hollenbeck +================================================================================ +++all: + * DSNLEXER::NextTok() now uses two separate modes to parse quoted strings. + This gives us the freedom to control our own destiny separate from the + constraints put on us by the Specctra DSN spec. + * Added Documentation/s-expressions.txt to explain all this. + * Enhanced our quoting protocol by moving away from doubling up double quotes + to a C line escape mechanism. + * Now support multi-line strings, which when properly escaped, can still be + read in as a token originating on a single line. + 2011-Jan-21 UPDATE Wayne Stambaugh ================================================================================ EESchema code refactoring and coding policy naming fixes. diff --git a/Documentation/s-expressions.txt b/Documentation/s-expressions.txt new file mode 100644 index 0000000000..5c39b66c20 --- /dev/null +++ b/Documentation/s-expressions.txt @@ -0,0 +1,89 @@ + +S-Expression Support in Kicad +============================================================================ +Author: Dick Hollenbeck +Date: Jan 2011 + + +An s-expression is a text stream or string, in the same vain as XML, consisting +of a sequence of elements. Each element is either an atom or list. An atom +corresponds to a string, while a list corresponds to an s-expression. The +following grammar represents our definition of an s-expression: + +sexpr ::= ( sx ) +sx ::= atom sxtail | sexptr sxtail | NULL +sxtail ::= sx | NULL +atom :: quoted | value +quoted :: "ws_string" +value :: nws_string + +An atom can either be a quoted string, which is a string containing whitespace +surrounded by double quotes, or a non-whitespace string that does not require +surrounding quotes. + +The s-expression syntax used in Kicad uses two quoting/syntax strategies, given +by the needs of the Specctra DSN specification and of our own non-specctra +needs. The Specctra DSN specification is not very clear with regard to quoting +and on top of that there is Freerouter's interpretation, which would actually +supercede anything in the Specctra DSN spec anyway, due to a desire to be +compatible with Freerouter. + +We have our own needs, which go beyond those of the Specctra DSN spec, so we +support the two syntaxes or quoting protocols for quoted atoms: + +1) Specctra quoting protocol (specctraMode) +2) Kicad quoting protocol (non-specctraMode) + +We can control our own destiny better by having a separately defined mode for +non Specctra DSN files. + +To summarize, in specctraMode Freerouter dictates to us what we need to do. In +non-specctraMode, which can be thought of as Kicad mode, we have our own quoting +protocol and can make changes without breaking the specctraMode. + +There needs to be agreement between how a file is saved, and how a file is read +back in, in either mode, to fulfill the round-tripping requirements. A file +written using one mode may not necessarily be readable using the other mode, +although it might be. Just don't count on it. + + +In Kicad mode: + +OUTPUTFORMATTER::Quoted() is the tool to wrap s-expression atoms. +DSNLEXER::NexTok() is basically the inverse function, and reads tokens back in. +These two must agree, so that what is written out comes back in un-altered. + +The decision to wrap the string or not is left to the Quoted() function. If the +string is wrapped, it will also escape internal double quotes, \n's and \r's. +Any null string is wrapped in quotes, and so is any string which starts with +'#', so that it is not confused with an s-expression comment. + + +Kicad S-expression Syntax and Quoting Protocol (non-specctraMode): +================================================================== + +*) All Kicad s-expression files are saved using a UTF8 encoding and should +support any international characters in the atoms. Some atoms are considered +keywords, and constitute a grammar superimposed on the s-expressions. + +*) All keywords are ASCII and lowercase. International characters are not to be +used here. + +*) DSNLEXER::NextTok() requires that any token be on a single line of input. If +you want to save a multi-line string, Quoted() will automatically escape the \n +or \r for you and put the output on a single line. It should round-trip fine. + +*) There can be escape sequences in a quoted string only. Escape sequences allow +foreign tools to generate byte patterns in the input stream. C style 2 byte hex +codes are supported, and so are 3 byte octal escape sequences. See DSNLEXER::NextTok() +for the full list of escape sequences, by searching file dsnlexer.cpp for the +string "ESCAPE SEQUENCES". Any use of the escape mechanism must still produce +UTF-8 encoded text after the escape handling is applied. + +*) Just because an escape sequence is supported on input, does not mean that +OUTPUTFORMATTER::Quoted() must generate such an escape sequence for output. For +example, having true tabs in the s-expression file is OK. So that will not be +escaped on output. Other similar cases exist. + +*) Backslash is the escape byte. + diff --git a/common/dsnlexer.cpp b/common/dsnlexer.cpp index de4639c225..5f027196ab 100644 --- a/common/dsnlexer.cpp +++ b/common/dsnlexer.cpp @@ -54,7 +54,8 @@ void DSNLEXER::init() curTok = DSN_NONE; stringDelimiter = '"'; - space_in_quoted_tokens = true; + specctraMode = false; + space_in_quoted_tokens = false; commentsAreTokens = false; } @@ -107,6 +108,20 @@ DSNLEXER::~DSNLEXER() } } +void DSNLEXER::SetSpecctraMode( bool aMode ) +{ + specctraMode = aMode; + if( aMode ) + { + // specctra mode defaults, some of which can still be changed in this mode. + space_in_quoted_tokens = true; + } + else + { + space_in_quoted_tokens = false; + stringDelimiter = '"'; + } +} void DSNLEXER::PushReader( LINE_READER* aLineReader ) { @@ -479,74 +494,119 @@ L_read: // else it was something like +5V, fall through below } - // a quoted string + // a quoted string, will return DSN_STRING if( *cur == stringDelimiter ) { - // New code, understands nested quotes, and deliberately restricts - // strings to a single line. Still strips off leading and trailing - // quotes, and now removes internal doubled up quotes -#if 1 - head = cur; - - // copy the token, character by character so we can remove doubled up quotes. - curText.clear(); - - while( head < limit ) + // Non-specctraMode, understands and deciphers escaped \, \r, \n, and \". + // Strips off leading and trailing double quotes + if( !specctraMode ) { - if( *head==stringDelimiter ) + // copy the token, character by character so we can remove doubled up quotes. + curText.clear(); + + ++cur; // skip over the leading delimiter, which is always " in non-specctraMode + + head = cur; + + while( head= limit ) + break; // throw exception at L_unterminated + + switch( *head++ ) + { + case '"': + case '\\': c = head[-1]; break; + case 'a': c = '\x07'; break; + case 'b': c = '\x08'; break; + case 'f': c = '\x0c'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\x09'; break; + case 'v': c = '\x0b'; break; + + case 'x': // 1 or 2 byte hex escape sequence + for( i=0; i<2; ++i ) + { + if( !isxdigit( head[i] ) ) + break; + tbuf[i] = head[i]; + } + tbuf[i] = '\0'; + if( i > 0 ) + c = (char) strtoul( tbuf, NULL, 16 ); + else + c = 'x'; // a goofed hex escape sequence, interpret as 'x' + head += i; + break; + + default: // 1-3 byte octal escape sequence + --head; + for( i=0; i<3; ++i ) + { + if( head[i] < '0' || head[i] > '7' ) + break; + tbuf[i] = head[i]; + } + tbuf[i] = '\0'; + if( i > 0 ) + c = (char) strtoul( tbuf, NULL, 8 ); + else + c = '\\'; // a goofed octal escape sequence, interpret as '\' + head += i; + break; + } + + curText += c; } - // fall thru - } + else if( *head == '"' ) // end of the non-specctraMode DSN_STRING + { + curTok = DSN_STRING; + ++head; // omit this trailing double quote + goto exit; + } - // check for a terminator - if( isStringTerminator( *head ) ) - { - curTok = DSN_STRING; - ++head; - goto exit; - } + else + curText += *head++; - curText += *head++; - } + } // while - wxString errtxt(_("Un-terminated delimited string") ); - THROW_PARSE_ERROR( errtxt, CurSource(), CurLine(), CurLineNumber(), CurOffset() ); - -#else // old code, did not understand nested quotes - ++cur; // skip over the leading delimiter: ",', or $ - - head = cur; - - while( head= limit ) - { + // L_unterminated: wxString errtxt(_("Un-terminated delimited string") ); THROW_PARSE_ERROR( errtxt, CurSource(), CurLine(), CurLineNumber(), CurOffset() ); } - curText.clear(); - curText.append( cur, head ); + else // specctraMode DSN_STRING + { + ++cur; // skip over the leading delimiter: ",', or $ - ++head; // skip over the trailing delimiter + head = cur; - curTok = DSN_STRING; - goto exit; -#endif + while( head= limit ) + { + wxString errtxt(_("Un-terminated delimited string") ); + THROW_PARSE_ERROR( errtxt, CurSource(), CurLine(), CurLineNumber(), CurOffset() ); + } + + curText.clear(); + curText.append( cur, head ); + + ++head; // skip over the trailing delimiter + + curTok = DSN_STRING; + goto exit; + } } // Maybe it is a token we will find in the token table. @@ -1413,7 +1473,6 @@ static const KEYWORD keywords[] = { class DSNTEST : public wxApp { - DSNLEXER* lexer; int nestLevel; diff --git a/common/richio.cpp b/common/richio.cpp index ea91df5a11..0ba0f37aac 100644 --- a/common/richio.cpp +++ b/common/richio.cpp @@ -282,43 +282,50 @@ int OUTPUTFORMATTER::Print( int nestLevel, const char* fmt, ... ) throw( IO_ERRO std::string OUTPUTFORMATTER::Quoted( const std::string& aWrapee ) throw( IO_ERROR ) { - // derived class's notion of what a quote character is - char quote = *GetQuoteChar( "(" ); + static const char quoteThese[] = "\t ()\n\r"; - // Will the string be wrapped based on its interior content? - const char* squote = GetQuoteChar( aWrapee.c_str() ); - - std::string wrapee = aWrapee; // return this - - // Search the interior of the string for 'quote' chars - // and replace them as found with duplicated quotes. - // Note that necessarily any string which has internal quotes will - // also be wrapped in quotes later in this function. - for( unsigned i=0; iGetSource().GetData(), diff --git a/pcbnew/specctra.cpp b/pcbnew/specctra.cpp index b63a11ae84..2e36c0b631 100644 --- a/pcbnew/specctra.cpp +++ b/pcbnew/specctra.cpp @@ -2650,7 +2650,7 @@ void SPECCTRA_DB::doFROMTO( FROMTO* growth ) throw( IO_ERROR ) // split apart the s into 3 separate tokens. Do this by // turning off the string delimiter in the lexer. - int old = SetStringDelimiter( 0 ); + char old = SetStringDelimiter( 0 ); if( !IsSymbol(NextTok() ) ) { diff --git a/pcbnew/specctra.h b/pcbnew/specctra.h index 79760e31b0..3f8b5ccefb 100644 --- a/pcbnew/specctra.h +++ b/pcbnew/specctra.h @@ -3798,6 +3798,8 @@ public: session = 0; quote_char += '"'; modulesAreFlipped = false; + + SetSpecctraMode( true ); } virtual ~SPECCTRA_DB()