diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 65c5f9c8d5..3942c8651b 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -4,6 +4,18 @@ KiCad ChangeLog 2010 Please add newer entries at the top, list the date and your name with email address. +2011-Jan-30 UPDATE Dick Hollenbeck +================================================================================ +++all: + * DSNLEXER::NextTok() now uses two separate modes to parse quoted strings. + This gives us the freedom to control our own destiny separate from the + constraints put on us by the Specctra DSN spec. + * Added Documentation/s-expressions.txt to explain all this. + * Enhanced our quoting protocol by moving away from doubling up double quotes + to a C line escape mechanism. + * Now support multi-line strings, which when properly escaped, can still be + read in as a token originating on a single line. + 2011-Jan-21 UPDATE Wayne Stambaugh ================================================================================ EESchema code refactoring and coding policy naming fixes. diff --git a/Documentation/s-expressions.txt b/Documentation/s-expressions.txt new file mode 100644 index 0000000000..5c39b66c20 --- /dev/null +++ b/Documentation/s-expressions.txt @@ -0,0 +1,89 @@ + +S-Expression Support in Kicad +============================================================================ +Author: Dick Hollenbeck +Date: Jan 2011 + + +An s-expression is a text stream or string, in the same vain as XML, consisting +of a sequence of elements. Each element is either an atom or list. An atom +corresponds to a string, while a list corresponds to an s-expression. The +following grammar represents our definition of an s-expression: + +sexpr ::= ( sx ) +sx ::= atom sxtail | sexptr sxtail | NULL +sxtail ::= sx | NULL +atom :: quoted | value +quoted :: "ws_string" +value :: nws_string + +An atom can either be a quoted string, which is a string containing whitespace +surrounded by double quotes, or a non-whitespace string that does not require +surrounding quotes. + +The s-expression syntax used in Kicad uses two quoting/syntax strategies, given +by the needs of the Specctra DSN specification and of our own non-specctra +needs. The Specctra DSN specification is not very clear with regard to quoting +and on top of that there is Freerouter's interpretation, which would actually +supercede anything in the Specctra DSN spec anyway, due to a desire to be +compatible with Freerouter. + +We have our own needs, which go beyond those of the Specctra DSN spec, so we +support the two syntaxes or quoting protocols for quoted atoms: + +1) Specctra quoting protocol (specctraMode) +2) Kicad quoting protocol (non-specctraMode) + +We can control our own destiny better by having a separately defined mode for +non Specctra DSN files. + +To summarize, in specctraMode Freerouter dictates to us what we need to do. In +non-specctraMode, which can be thought of as Kicad mode, we have our own quoting +protocol and can make changes without breaking the specctraMode. + +There needs to be agreement between how a file is saved, and how a file is read +back in, in either mode, to fulfill the round-tripping requirements. A file +written using one mode may not necessarily be readable using the other mode, +although it might be. Just don't count on it. + + +In Kicad mode: + +OUTPUTFORMATTER::Quoted() is the tool to wrap s-expression atoms. +DSNLEXER::NexTok() is basically the inverse function, and reads tokens back in. +These two must agree, so that what is written out comes back in un-altered. + +The decision to wrap the string or not is left to the Quoted() function. If the +string is wrapped, it will also escape internal double quotes, \n's and \r's. +Any null string is wrapped in quotes, and so is any string which starts with +'#', so that it is not confused with an s-expression comment. + + +Kicad S-expression Syntax and Quoting Protocol (non-specctraMode): +================================================================== + +*) All Kicad s-expression files are saved using a UTF8 encoding and should +support any international characters in the atoms. Some atoms are considered +keywords, and constitute a grammar superimposed on the s-expressions. + +*) All keywords are ASCII and lowercase. International characters are not to be +used here. + +*) DSNLEXER::NextTok() requires that any token be on a single line of input. If +you want to save a multi-line string, Quoted() will automatically escape the \n +or \r for you and put the output on a single line. It should round-trip fine. + +*) There can be escape sequences in a quoted string only. Escape sequences allow +foreign tools to generate byte patterns in the input stream. C style 2 byte hex +codes are supported, and so are 3 byte octal escape sequences. See DSNLEXER::NextTok() +for the full list of escape sequences, by searching file dsnlexer.cpp for the +string "ESCAPE SEQUENCES". Any use of the escape mechanism must still produce +UTF-8 encoded text after the escape handling is applied. + +*) Just because an escape sequence is supported on input, does not mean that +OUTPUTFORMATTER::Quoted() must generate such an escape sequence for output. For +example, having true tabs in the s-expression file is OK. So that will not be +escaped on output. Other similar cases exist. + +*) Backslash is the escape byte. + diff --git a/common/dsnlexer.cpp b/common/dsnlexer.cpp index de4639c225..5f027196ab 100644 --- a/common/dsnlexer.cpp +++ b/common/dsnlexer.cpp @@ -54,7 +54,8 @@ void DSNLEXER::init() curTok = DSN_NONE; stringDelimiter = '"'; - space_in_quoted_tokens = true; + specctraMode = false; + space_in_quoted_tokens = false; commentsAreTokens = false; } @@ -107,6 +108,20 @@ DSNLEXER::~DSNLEXER() } } +void DSNLEXER::SetSpecctraMode( bool aMode ) +{ + specctraMode = aMode; + if( aMode ) + { + // specctra mode defaults, some of which can still be changed in this mode. + space_in_quoted_tokens = true; + } + else + { + space_in_quoted_tokens = false; + stringDelimiter = '"'; + } +} void DSNLEXER::PushReader( LINE_READER* aLineReader ) { @@ -479,74 +494,119 @@ L_read: // else it was something like +5V, fall through below } - // a quoted string + // a quoted string, will return DSN_STRING if( *cur == stringDelimiter ) { - // New code, understands nested quotes, and deliberately restricts - // strings to a single line. Still strips off leading and trailing - // quotes, and now removes internal doubled up quotes -#if 1 - head = cur; - - // copy the token, character by character so we can remove doubled up quotes. - curText.clear(); - - while( head < limit ) + // Non-specctraMode, understands and deciphers escaped \, \r, \n, and \". + // Strips off leading and trailing double quotes + if( !specctraMode ) { - if( *head==stringDelimiter ) + // copy the token, character by character so we can remove doubled up quotes. + curText.clear(); + + ++cur; // skip over the leading delimiter, which is always " in non-specctraMode + + head = cur; + + while( head= limit ) + break; // throw exception at L_unterminated + + switch( *head++ ) + { + case '"': + case '\\': c = head[-1]; break; + case 'a': c = '\x07'; break; + case 'b': c = '\x08'; break; + case 'f': c = '\x0c'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\x09'; break; + case 'v': c = '\x0b'; break; + + case 'x': // 1 or 2 byte hex escape sequence + for( i=0; i<2; ++i ) + { + if( !isxdigit( head[i] ) ) + break; + tbuf[i] = head[i]; + } + tbuf[i] = '\0'; + if( i > 0 ) + c = (char) strtoul( tbuf, NULL, 16 ); + else + c = 'x'; // a goofed hex escape sequence, interpret as 'x' + head += i; + break; + + default: // 1-3 byte octal escape sequence + --head; + for( i=0; i<3; ++i ) + { + if( head[i] < '0' || head[i] > '7' ) + break; + tbuf[i] = head[i]; + } + tbuf[i] = '\0'; + if( i > 0 ) + c = (char) strtoul( tbuf, NULL, 8 ); + else + c = '\\'; // a goofed octal escape sequence, interpret as '\' + head += i; + break; + } + + curText += c; } - // fall thru - } + else if( *head == '"' ) // end of the non-specctraMode DSN_STRING + { + curTok = DSN_STRING; + ++head; // omit this trailing double quote + goto exit; + } - // check for a terminator - if( isStringTerminator( *head ) ) - { - curTok = DSN_STRING; - ++head; - goto exit; - } + else + curText += *head++; - curText += *head++; - } + } // while - wxString errtxt(_("Un-terminated delimited string") ); - THROW_PARSE_ERROR( errtxt, CurSource(), CurLine(), CurLineNumber(), CurOffset() ); - -#else // old code, did not understand nested quotes - ++cur; // skip over the leading delimiter: ",', or $ - - head = cur; - - while( head= limit ) - { + // L_unterminated: wxString errtxt(_("Un-terminated delimited string") ); THROW_PARSE_ERROR( errtxt, CurSource(), CurLine(), CurLineNumber(), CurOffset() ); } - curText.clear(); - curText.append( cur, head ); + else // specctraMode DSN_STRING + { + ++cur; // skip over the leading delimiter: ",', or $ - ++head; // skip over the trailing delimiter + head = cur; - curTok = DSN_STRING; - goto exit; -#endif + while( head= limit ) + { + wxString errtxt(_("Un-terminated delimited string") ); + THROW_PARSE_ERROR( errtxt, CurSource(), CurLine(), CurLineNumber(), CurOffset() ); + } + + curText.clear(); + curText.append( cur, head ); + + ++head; // skip over the trailing delimiter + + curTok = DSN_STRING; + goto exit; + } } // Maybe it is a token we will find in the token table. @@ -1413,7 +1473,6 @@ static const KEYWORD keywords[] = { class DSNTEST : public wxApp { - DSNLEXER* lexer; int nestLevel; diff --git a/common/richio.cpp b/common/richio.cpp index ea91df5a11..0ba0f37aac 100644 --- a/common/richio.cpp +++ b/common/richio.cpp @@ -282,43 +282,50 @@ int OUTPUTFORMATTER::Print( int nestLevel, const char* fmt, ... ) throw( IO_ERRO std::string OUTPUTFORMATTER::Quoted( const std::string& aWrapee ) throw( IO_ERROR ) { - // derived class's notion of what a quote character is - char quote = *GetQuoteChar( "(" ); + static const char quoteThese[] = "\t ()\n\r"; - // Will the string be wrapped based on its interior content? - const char* squote = GetQuoteChar( aWrapee.c_str() ); - - std::string wrapee = aWrapee; // return this - - // Search the interior of the string for 'quote' chars - // and replace them as found with duplicated quotes. - // Note that necessarily any string which has internal quotes will - // also be wrapped in quotes later in this function. - for( unsigned i=0; iGetSource().GetData(), diff --git a/pcbnew/specctra.cpp b/pcbnew/specctra.cpp index b63a11ae84..2e36c0b631 100644 --- a/pcbnew/specctra.cpp +++ b/pcbnew/specctra.cpp @@ -2650,7 +2650,7 @@ void SPECCTRA_DB::doFROMTO( FROMTO* growth ) throw( IO_ERROR ) // split apart the s into 3 separate tokens. Do this by // turning off the string delimiter in the lexer. - int old = SetStringDelimiter( 0 ); + char old = SetStringDelimiter( 0 ); if( !IsSymbol(NextTok() ) ) { diff --git a/pcbnew/specctra.h b/pcbnew/specctra.h index 79760e31b0..3f8b5ccefb 100644 --- a/pcbnew/specctra.h +++ b/pcbnew/specctra.h @@ -3798,6 +3798,8 @@ public: session = 0; quote_char += '"'; modulesAreFlipped = false; + + SetSpecctraMode( true ); } virtual ~SPECCTRA_DB()