kicad/markdown2html/html_formatter/html_smartypants.c

510 lines
13 KiB
C

/*
* Copyright (c) 2011, Vicent Marti
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "buffer.h"
#include "html.h"
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#if defined(_WIN32)
#define snprintf _snprintf
#endif
struct smartypants_data
{
int in_squote;
int in_dquote;
};
static size_t smartypants_cb__ltag( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size );
static size_t smartypants_cb__dquote( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size );
static size_t smartypants_cb__amp( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size );
static size_t smartypants_cb__period( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size );
static size_t smartypants_cb__number( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size );
static size_t smartypants_cb__dash( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size );
static size_t smartypants_cb__parens( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size );
static size_t smartypants_cb__squote( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size );
static size_t smartypants_cb__backtick( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size );
static size_t smartypants_cb__escape( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size );
static size_t( *smartypants_cb_ptrs[] )
( struct buf*, struct smartypants_data*, uint8_t, const uint8_t*, size_t ) =
{
NULL, /* 0 */
smartypants_cb__dash, /* 1 */
smartypants_cb__parens, /* 2 */
smartypants_cb__squote, /* 3 */
smartypants_cb__dquote, /* 4 */
smartypants_cb__amp, /* 5 */
smartypants_cb__period, /* 6 */
smartypants_cb__number, /* 7 */
smartypants_cb__ltag, /* 8 */
smartypants_cb__backtick, /* 9 */
smartypants_cb__escape, /* 10 */
};
static const uint8_t smartypants_cb_chars[] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0,
0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static inline int word_boundary( uint8_t c )
{
return c == 0 || isspace( c ) || ispunct( c );
}
static int smartypants_quotes( struct buf* ob,
uint8_t previous_char,
uint8_t next_char,
uint8_t quote,
int* is_open )
{
char ent[8];
if( *is_open && !word_boundary( next_char ) )
return 0;
if( !(*is_open) && !word_boundary( previous_char ) )
return 0;
snprintf( ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote );
*is_open = !(*is_open);
bufputs( ob, ent );
return 1;
}
static size_t smartypants_cb__squote( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size )
{
if( size >= 2 )
{
uint8_t t1 = tolower( text[1] );
if( t1 == '\'' )
{
if( smartypants_quotes( ob, previous_char, size >= 3 ? text[2] : 0, 'd',
&smrt->in_dquote ) )
return 1;
}
if( (t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd')
&& ( size == 3 || word_boundary( text[2] ) ) )
{
BUFPUTSL( ob, "&rsquo;" );
return 0;
}
if( size >= 3 )
{
uint8_t t2 = tolower( text[2] );
if( ( (t1 == 'r' && t2 == 'e')
|| (t1 == 'l' && t2 == 'l')
|| (t1 == 'v' && t2 == 'e') )
&& ( size == 4 || word_boundary( text[3] ) ) )
{
BUFPUTSL( ob, "&rsquo;" );
return 0;
}
}
}
if( smartypants_quotes( ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote ) )
return 0;
bufputc( ob, text[0] );
return 0;
}
static size_t smartypants_cb__parens( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size )
{
if( size >= 3 )
{
uint8_t t1 = tolower( text[1] );
uint8_t t2 = tolower( text[2] );
if( t1 == 'c' && t2 == ')' )
{
BUFPUTSL( ob, "&copy;" );
return 2;
}
if( t1 == 'r' && t2 == ')' )
{
BUFPUTSL( ob, "&reg;" );
return 2;
}
if( size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')' )
{
BUFPUTSL( ob, "&trade;" );
return 3;
}
}
bufputc( ob, text[0] );
return 0;
}
static size_t smartypants_cb__dash( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size )
{
if( size >= 3 && text[1] == '-' && text[2] == '-' )
{
BUFPUTSL( ob, "&mdash;" );
return 2;
}
if( size >= 2 && text[1] == '-' )
{
BUFPUTSL( ob, "&ndash;" );
return 1;
}
bufputc( ob, text[0] );
return 0;
}
static size_t smartypants_cb__amp( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size )
{
if( size >= 6 && memcmp( text, "&quot;", 6 ) == 0 )
{
if( smartypants_quotes( ob, previous_char, size >= 7 ? text[6] : 0, 'd',
&smrt->in_dquote ) )
return 5;
}
if( size >= 4 && memcmp( text, "&#0;", 4 ) == 0 )
return 3;
bufputc( ob, '&' );
return 0;
}
static size_t smartypants_cb__period( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size )
{
if( size >= 3 && text[1] == '.' && text[2] == '.' )
{
BUFPUTSL( ob, "&hellip;" );
return 2;
}
if( size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.' )
{
BUFPUTSL( ob, "&hellip;" );
return 4;
}
bufputc( ob, text[0] );
return 0;
}
static size_t smartypants_cb__backtick( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size )
{
if( size >= 2 && text[1] == '`' )
{
if( smartypants_quotes( ob, previous_char, size >= 3 ? text[2] : 0, 'd',
&smrt->in_dquote ) )
return 1;
}
return 0;
}
static size_t smartypants_cb__number( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size )
{
if( word_boundary( previous_char ) && size >= 3 )
{
if( text[0] == '1' && text[1] == '/' && text[2] == '2' )
{
if( size == 3 || word_boundary( text[3] ) )
{
BUFPUTSL( ob, "&frac12;" );
return 2;
}
}
if( text[0] == '1' && text[1] == '/' && text[2] == '4' )
{
if( size == 3 || word_boundary( text[3] )
|| (size >= 5 && tolower( text[3] ) == 't' && tolower( text[4] ) == 'h') )
{
BUFPUTSL( ob, "&frac14;" );
return 2;
}
}
if( text[0] == '3' && text[1] == '/' && text[2] == '4' )
{
if( size == 3 || word_boundary( text[3] )
|| (size >= 6 && tolower( text[3] ) == 't' && tolower( text[4] ) == 'h'
&& tolower( text[5] ) == 's') )
{
BUFPUTSL( ob, "&frac34;" );
return 2;
}
}
}
bufputc( ob, text[0] );
return 0;
}
static size_t smartypants_cb__dquote( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size )
{
if( !smartypants_quotes( ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote ) )
BUFPUTSL( ob, "&quot;" );
return 0;
}
static size_t smartypants_cb__ltag( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size )
{
static const char* skip_tags[] =
{
"pre", "code", "var", "samp", "kbd", "math", "script", "style"
};
static const size_t skip_tags_count = 8;
size_t tag, i = 0;
while( i < size && text[i] != '>' )
i++;
for( tag = 0; tag < skip_tags_count; ++tag )
{
if( sdhtml_is_tag( text, size, skip_tags[tag] ) == HTML_TAG_OPEN )
break;
}
if( tag < skip_tags_count )
{
for( ; ; )
{
while( i < size && text[i] != '<' )
i++;
if( i == size )
break;
if( sdhtml_is_tag( text + i, size - i, skip_tags[tag] ) == HTML_TAG_CLOSE )
break;
i++;
}
while( i < size && text[i] != '>' )
i++;
}
bufput( ob, text, i + 1 );
return i;
}
static size_t smartypants_cb__escape( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size )
{
if( size < 2 )
return 0;
switch( text[1] )
{
case '\\':
case '"':
case '\'':
case '.':
case '-':
case '`':
bufputc( ob, text[1] );
return 1;
default:
bufputc( ob, '\\' );
return 0;
}
}
#if 0
static struct
{
uint8_t c0;
const uint8_t* pattern;
const uint8_t* entity;
int skip;
}
smartypants_subs[] =
{
{ '\'', "'s>", "&rsquo;", 0 },
{ '\'', "'t>", "&rsquo;", 0 },
{ '\'', "'re>", "&rsquo;", 0 },
{ '\'', "'ll>", "&rsquo;", 0 },
{ '\'', "'ve>", "&rsquo;", 0 },
{ '\'', "'m>", "&rsquo;", 0 },
{ '\'', "'d>", "&rsquo;", 0 },
{ '-', "--", "&mdash;", 1 },
{ '-', "<->", "&ndash;", 0 },
{ '.', "...", "&hellip;", 2 },
{ '.', ". . .", "&hellip;", 4 },
{ '(', "(c)", "&copy;", 2 },
{ '(', "(r)", "&reg;", 2 },
{ '(', "(tm)", "&trade;", 3 },
{ '3', "<3/4>", "&frac34;", 2 },
{ '3', "<3/4ths>", "&frac34;", 2 },
{ '1', "<1/2>", "&frac12;", 2 },
{ '1', "<1/4>", "&frac14;", 2 },
{ '1', "<1/4th>", "&frac14;", 2 },
{ '&', "&#0;", 0, 3 },
};
#endif
void sdhtml_smartypants( struct buf* ob, const uint8_t* text, size_t size )
{
size_t i;
struct smartypants_data smrt = { 0, 0 };
if( !text )
return;
bufgrow( ob, size );
for( i = 0; i < size; ++i )
{
size_t org;
uint8_t action = 0;
org = i;
while( i < size && (action = smartypants_cb_chars[text[i]]) == 0 )
i++;
if( i > org )
bufput( ob, text + org, i - org );
if( i < size )
{
i += smartypants_cb_ptrs[(int) action]
( ob, &smrt, i ? text[i - 1] : 0, text + i, size - i );
}
}
}