510 lines
13 KiB
C
510 lines
13 KiB
C
|
/*
|
||
|
* Copyright (c) 2011, Vicent Marti
|
||
|
*
|
||
|
* Permission to use, copy, modify, and distribute this software for any
|
||
|
* purpose with or without fee is hereby granted, provided that the above
|
||
|
* copyright notice and this permission notice appear in all copies.
|
||
|
*
|
||
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||
|
*/
|
||
|
|
||
|
#include "buffer.h"
|
||
|
#include "html.h"
|
||
|
|
||
|
#include <string.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <stdio.h>
|
||
|
#include <ctype.h>
|
||
|
|
||
|
#if defined(_WIN32)
|
||
|
#define snprintf _snprintf
|
||
|
#endif
|
||
|
|
||
|
struct smartypants_data
|
||
|
{
|
||
|
int in_squote;
|
||
|
int in_dquote;
|
||
|
};
|
||
|
|
||
|
static size_t smartypants_cb__ltag( struct buf* ob,
|
||
|
struct smartypants_data* smrt,
|
||
|
uint8_t previous_char,
|
||
|
const uint8_t* text,
|
||
|
size_t size );
|
||
|
static size_t smartypants_cb__dquote( struct buf* ob,
|
||
|
struct smartypants_data* smrt,
|
||
|
uint8_t previous_char,
|
||
|
const uint8_t* text,
|
||
|
size_t size );
|
||
|
static size_t smartypants_cb__amp( struct buf* ob,
|
||
|
struct smartypants_data* smrt,
|
||
|
uint8_t previous_char,
|
||
|
const uint8_t* text,
|
||
|
size_t size );
|
||
|
static size_t smartypants_cb__period( struct buf* ob,
|
||
|
struct smartypants_data* smrt,
|
||
|
uint8_t previous_char,
|
||
|
const uint8_t* text,
|
||
|
size_t size );
|
||
|
static size_t smartypants_cb__number( struct buf* ob,
|
||
|
struct smartypants_data* smrt,
|
||
|
uint8_t previous_char,
|
||
|
const uint8_t* text,
|
||
|
size_t size );
|
||
|
static size_t smartypants_cb__dash( struct buf* ob,
|
||
|
struct smartypants_data* smrt,
|
||
|
uint8_t previous_char,
|
||
|
const uint8_t* text,
|
||
|
size_t size );
|
||
|
static size_t smartypants_cb__parens( struct buf* ob,
|
||
|
struct smartypants_data* smrt,
|
||
|
uint8_t previous_char,
|
||
|
const uint8_t* text,
|
||
|
size_t size );
|
||
|
static size_t smartypants_cb__squote( struct buf* ob,
|
||
|
struct smartypants_data* smrt,
|
||
|
uint8_t previous_char,
|
||
|
const uint8_t* text,
|
||
|
size_t size );
|
||
|
static size_t smartypants_cb__backtick( struct buf* ob,
|
||
|
struct smartypants_data* smrt,
|
||
|
uint8_t previous_char,
|
||
|
const uint8_t* text,
|
||
|
size_t size );
|
||
|
static size_t smartypants_cb__escape( struct buf* ob,
|
||
|
struct smartypants_data* smrt,
|
||
|
uint8_t previous_char,
|
||
|
const uint8_t* text,
|
||
|
size_t size );
|
||
|
|
||
|
static size_t( *smartypants_cb_ptrs[] )
|
||
|
( struct buf*, struct smartypants_data*, uint8_t, const uint8_t*, size_t ) =
|
||
|
{
|
||
|
NULL, /* 0 */
|
||
|
smartypants_cb__dash, /* 1 */
|
||
|
smartypants_cb__parens, /* 2 */
|
||
|
smartypants_cb__squote, /* 3 */
|
||
|
smartypants_cb__dquote, /* 4 */
|
||
|
smartypants_cb__amp, /* 5 */
|
||
|
smartypants_cb__period, /* 6 */
|
||
|
smartypants_cb__number, /* 7 */
|
||
|
smartypants_cb__ltag, /* 8 */
|
||
|
smartypants_cb__backtick, /* 9 */
|
||
|
smartypants_cb__escape, /* 10 */
|
||
|
};
|
||
|
|
||
|
static const uint8_t smartypants_cb_chars[] =
|
||
|
{
|
||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0,
|
||
|
0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
|
||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
|
||
|
9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||
|
};
|
||
|
|
||
|
static inline int word_boundary( uint8_t c )
|
||
|
{
|
||
|
return c == 0 || isspace( c ) || ispunct( c );
|
||
|
}
|
||
|
|
||
|
|
||
|
static int smartypants_quotes( struct buf* ob,
|
||
|
uint8_t previous_char,
|
||
|
uint8_t next_char,
|
||
|
uint8_t quote,
|
||
|
int* is_open )
|
||
|
{
|
||
|
char ent[8];
|
||
|
|
||
|
if( *is_open && !word_boundary( next_char ) )
|
||
|
return 0;
|
||
|
|
||
|
if( !(*is_open) && !word_boundary( previous_char ) )
|
||
|
return 0;
|
||
|
|
||
|
snprintf( ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote );
|
||
|
*is_open = !(*is_open);
|
||
|
bufputs( ob, ent );
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
|
||
|
static size_t smartypants_cb__squote( struct buf* ob,
|
||
|
struct smartypants_data* smrt,
|
||
|
uint8_t previous_char,
|
||
|
const uint8_t* text,
|
||
|
size_t size )
|
||
|
{
|
||
|
if( size >= 2 )
|
||
|
{
|
||
|
uint8_t t1 = tolower( text[1] );
|
||
|
|
||
|
if( t1 == '\'' )
|
||
|
{
|
||
|
if( smartypants_quotes( ob, previous_char, size >= 3 ? text[2] : 0, 'd',
|
||
|
&smrt->in_dquote ) )
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
if( (t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd')
|
||
|
&& ( size == 3 || word_boundary( text[2] ) ) )
|
||
|
{
|
||
|
BUFPUTSL( ob, "’" );
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
if( size >= 3 )
|
||
|
{
|
||
|
uint8_t t2 = tolower( text[2] );
|
||
|
|
||
|
if( ( (t1 == 'r' && t2 == 'e')
|
||
|
|| (t1 == 'l' && t2 == 'l')
|
||
|
|| (t1 == 'v' && t2 == 'e') )
|
||
|
&& ( size == 4 || word_boundary( text[3] ) ) )
|
||
|
{
|
||
|
BUFPUTSL( ob, "’" );
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if( smartypants_quotes( ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote ) )
|
||
|
return 0;
|
||
|
|
||
|
bufputc( ob, text[0] );
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
static size_t smartypants_cb__parens( struct buf* ob,
|
||
|
struct smartypants_data* smrt,
|
||
|
uint8_t previous_char,
|
||
|
const uint8_t* text,
|
||
|
size_t size )
|
||
|
{
|
||
|
if( size >= 3 )
|
||
|
{
|
||
|
uint8_t t1 = tolower( text[1] );
|
||
|
uint8_t t2 = tolower( text[2] );
|
||
|
|
||
|
if( t1 == 'c' && t2 == ')' )
|
||
|
{
|
||
|
BUFPUTSL( ob, "©" );
|
||
|
return 2;
|
||
|
}
|
||
|
|
||
|
if( t1 == 'r' && t2 == ')' )
|
||
|
{
|
||
|
BUFPUTSL( ob, "®" );
|
||
|
return 2;
|
||
|
}
|
||
|
|
||
|
if( size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')' )
|
||
|
{
|
||
|
BUFPUTSL( ob, "™" );
|
||
|
return 3;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
bufputc( ob, text[0] );
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
static size_t smartypants_cb__dash( struct buf* ob,
|
||
|
struct smartypants_data* smrt,
|
||
|
uint8_t previous_char,
|
||
|
const uint8_t* text,
|
||
|
size_t size )
|
||
|
{
|
||
|
if( size >= 3 && text[1] == '-' && text[2] == '-' )
|
||
|
{
|
||
|
BUFPUTSL( ob, "—" );
|
||
|
return 2;
|
||
|
}
|
||
|
|
||
|
if( size >= 2 && text[1] == '-' )
|
||
|
{
|
||
|
BUFPUTSL( ob, "–" );
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
bufputc( ob, text[0] );
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
static size_t smartypants_cb__amp( struct buf* ob,
|
||
|
struct smartypants_data* smrt,
|
||
|
uint8_t previous_char,
|
||
|
const uint8_t* text,
|
||
|
size_t size )
|
||
|
{
|
||
|
if( size >= 6 && memcmp( text, """, 6 ) == 0 )
|
||
|
{
|
||
|
if( smartypants_quotes( ob, previous_char, size >= 7 ? text[6] : 0, 'd',
|
||
|
&smrt->in_dquote ) )
|
||
|
return 5;
|
||
|
}
|
||
|
|
||
|
if( size >= 4 && memcmp( text, "�", 4 ) == 0 )
|
||
|
return 3;
|
||
|
|
||
|
bufputc( ob, '&' );
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
static size_t smartypants_cb__period( struct buf* ob,
|
||
|
struct smartypants_data* smrt,
|
||
|
uint8_t previous_char,
|
||
|
const uint8_t* text,
|
||
|
size_t size )
|
||
|
{
|
||
|
if( size >= 3 && text[1] == '.' && text[2] == '.' )
|
||
|
{
|
||
|
BUFPUTSL( ob, "…" );
|
||
|
return 2;
|
||
|
}
|
||
|
|
||
|
if( size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.' )
|
||
|
{
|
||
|
BUFPUTSL( ob, "…" );
|
||
|
return 4;
|
||
|
}
|
||
|
|
||
|
bufputc( ob, text[0] );
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
static size_t smartypants_cb__backtick( struct buf* ob,
|
||
|
struct smartypants_data* smrt,
|
||
|
uint8_t previous_char,
|
||
|
const uint8_t* text,
|
||
|
size_t size )
|
||
|
{
|
||
|
if( size >= 2 && text[1] == '`' )
|
||
|
{
|
||
|
if( smartypants_quotes( ob, previous_char, size >= 3 ? text[2] : 0, 'd',
|
||
|
&smrt->in_dquote ) )
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
static size_t smartypants_cb__number( struct buf* ob,
|
||
|
struct smartypants_data* smrt,
|
||
|
uint8_t previous_char,
|
||
|
const uint8_t* text,
|
||
|
size_t size )
|
||
|
{
|
||
|
if( word_boundary( previous_char ) && size >= 3 )
|
||
|
{
|
||
|
if( text[0] == '1' && text[1] == '/' && text[2] == '2' )
|
||
|
{
|
||
|
if( size == 3 || word_boundary( text[3] ) )
|
||
|
{
|
||
|
BUFPUTSL( ob, "½" );
|
||
|
return 2;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if( text[0] == '1' && text[1] == '/' && text[2] == '4' )
|
||
|
{
|
||
|
if( size == 3 || word_boundary( text[3] )
|
||
|
|| (size >= 5 && tolower( text[3] ) == 't' && tolower( text[4] ) == 'h') )
|
||
|
{
|
||
|
BUFPUTSL( ob, "¼" );
|
||
|
return 2;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if( text[0] == '3' && text[1] == '/' && text[2] == '4' )
|
||
|
{
|
||
|
if( size == 3 || word_boundary( text[3] )
|
||
|
|| (size >= 6 && tolower( text[3] ) == 't' && tolower( text[4] ) == 'h'
|
||
|
&& tolower( text[5] ) == 's') )
|
||
|
{
|
||
|
BUFPUTSL( ob, "¾" );
|
||
|
return 2;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
bufputc( ob, text[0] );
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
static size_t smartypants_cb__dquote( struct buf* ob,
|
||
|
struct smartypants_data* smrt,
|
||
|
uint8_t previous_char,
|
||
|
const uint8_t* text,
|
||
|
size_t size )
|
||
|
{
|
||
|
if( !smartypants_quotes( ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote ) )
|
||
|
BUFPUTSL( ob, """ );
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
static size_t smartypants_cb__ltag( struct buf* ob,
|
||
|
struct smartypants_data* smrt,
|
||
|
uint8_t previous_char,
|
||
|
const uint8_t* text,
|
||
|
size_t size )
|
||
|
{
|
||
|
static const char* skip_tags[] =
|
||
|
{
|
||
|
"pre", "code", "var", "samp", "kbd", "math", "script", "style"
|
||
|
};
|
||
|
static const size_t skip_tags_count = 8;
|
||
|
|
||
|
size_t tag, i = 0;
|
||
|
|
||
|
while( i < size && text[i] != '>' )
|
||
|
i++;
|
||
|
|
||
|
for( tag = 0; tag < skip_tags_count; ++tag )
|
||
|
{
|
||
|
if( sdhtml_is_tag( text, size, skip_tags[tag] ) == HTML_TAG_OPEN )
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if( tag < skip_tags_count )
|
||
|
{
|
||
|
for( ; ; )
|
||
|
{
|
||
|
while( i < size && text[i] != '<' )
|
||
|
i++;
|
||
|
|
||
|
if( i == size )
|
||
|
break;
|
||
|
|
||
|
if( sdhtml_is_tag( text + i, size - i, skip_tags[tag] ) == HTML_TAG_CLOSE )
|
||
|
break;
|
||
|
|
||
|
i++;
|
||
|
}
|
||
|
|
||
|
while( i < size && text[i] != '>' )
|
||
|
i++;
|
||
|
}
|
||
|
|
||
|
bufput( ob, text, i + 1 );
|
||
|
return i;
|
||
|
}
|
||
|
|
||
|
|
||
|
static size_t smartypants_cb__escape( struct buf* ob,
|
||
|
struct smartypants_data* smrt,
|
||
|
uint8_t previous_char,
|
||
|
const uint8_t* text,
|
||
|
size_t size )
|
||
|
{
|
||
|
if( size < 2 )
|
||
|
return 0;
|
||
|
|
||
|
switch( text[1] )
|
||
|
{
|
||
|
case '\\':
|
||
|
case '"':
|
||
|
case '\'':
|
||
|
case '.':
|
||
|
case '-':
|
||
|
case '`':
|
||
|
bufputc( ob, text[1] );
|
||
|
return 1;
|
||
|
|
||
|
default:
|
||
|
bufputc( ob, '\\' );
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
#if 0
|
||
|
static struct
|
||
|
{
|
||
|
uint8_t c0;
|
||
|
const uint8_t* pattern;
|
||
|
const uint8_t* entity;
|
||
|
int skip;
|
||
|
}
|
||
|
smartypants_subs[] =
|
||
|
{
|
||
|
{ '\'', "'s>", "’", 0 },
|
||
|
{ '\'', "'t>", "’", 0 },
|
||
|
{ '\'', "'re>", "’", 0 },
|
||
|
{ '\'', "'ll>", "’", 0 },
|
||
|
{ '\'', "'ve>", "’", 0 },
|
||
|
{ '\'', "'m>", "’", 0 },
|
||
|
{ '\'', "'d>", "’", 0 },
|
||
|
{ '-', "--", "—", 1 },
|
||
|
{ '-', "<->", "–", 0 },
|
||
|
{ '.', "...", "…", 2 },
|
||
|
{ '.', ". . .", "…", 4 },
|
||
|
{ '(', "(c)", "©", 2 },
|
||
|
{ '(', "(r)", "®", 2 },
|
||
|
{ '(', "(tm)", "™", 3 },
|
||
|
{ '3', "<3/4>", "¾", 2 },
|
||
|
{ '3', "<3/4ths>", "¾", 2 },
|
||
|
{ '1', "<1/2>", "½", 2 },
|
||
|
{ '1', "<1/4>", "¼", 2 },
|
||
|
{ '1', "<1/4th>", "¼", 2 },
|
||
|
{ '&', "�", 0, 3 },
|
||
|
};
|
||
|
#endif
|
||
|
|
||
|
void sdhtml_smartypants( struct buf* ob, const uint8_t* text, size_t size )
|
||
|
{
|
||
|
size_t i;
|
||
|
struct smartypants_data smrt = { 0, 0 };
|
||
|
|
||
|
if( !text )
|
||
|
return;
|
||
|
|
||
|
bufgrow( ob, size );
|
||
|
|
||
|
for( i = 0; i < size; ++i )
|
||
|
{
|
||
|
size_t org;
|
||
|
uint8_t action = 0;
|
||
|
|
||
|
org = i;
|
||
|
|
||
|
while( i < size && (action = smartypants_cb_chars[text[i]]) == 0 )
|
||
|
i++;
|
||
|
|
||
|
if( i > org )
|
||
|
bufput( ob, text + org, i - org );
|
||
|
|
||
|
if( i < size )
|
||
|
{
|
||
|
i += smartypants_cb_ptrs[(int) action]
|
||
|
( ob, &smrt, i ? text[i - 1] : 0, text + i, size - i );
|
||
|
}
|
||
|
}
|
||
|
}
|