311 lines
7.2 KiB
C
311 lines
7.2 KiB
C
/*
|
|
* Copyright (c) 2011, Vicent Marti
|
|
*
|
|
* Permission to use, copy, modify, and distribute this software for any
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
* copyright notice and this permission notice appear in all copies.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
*/
|
|
|
|
#include "buffer.h"
|
|
#include "autolink.h"
|
|
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <ctype.h>
|
|
|
|
#if defined(_WIN32)
|
|
#define strncasecmp _strnicmp
|
|
#endif
|
|
|
|
int sd_autolink_issafe( const uint8_t* link, size_t link_len )
|
|
{
|
|
static const size_t valid_uris_count = 5;
|
|
static const char* valid_uris[] =
|
|
{
|
|
"/", "http://", "https://", "ftp://", "mailto:"
|
|
};
|
|
|
|
size_t i;
|
|
|
|
for( i = 0; i < valid_uris_count; ++i )
|
|
{
|
|
size_t len = strlen( valid_uris[i] );
|
|
|
|
if( link_len > len
|
|
&& strncasecmp( (char*) link, valid_uris[i], len ) == 0
|
|
&& isalnum( link[len] ) )
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static size_t autolink_delim( uint8_t* data, size_t link_end, size_t max_rewind, size_t size )
|
|
{
|
|
uint8_t cclose, copen = 0;
|
|
size_t i;
|
|
|
|
for( i = 0; i < link_end; ++i )
|
|
if( data[i] == '<' )
|
|
{
|
|
link_end = i;
|
|
break;
|
|
}
|
|
|
|
while( link_end > 0 )
|
|
{
|
|
if( strchr( "?!.,", data[link_end - 1] ) != NULL )
|
|
link_end--;
|
|
|
|
else if( data[link_end - 1] == ';' )
|
|
{
|
|
size_t new_end = link_end - 2;
|
|
|
|
while( new_end > 0 && isalpha( data[new_end] ) )
|
|
new_end--;
|
|
|
|
if( new_end < link_end - 2 && data[new_end] == '&' )
|
|
link_end = new_end;
|
|
else
|
|
link_end--;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
|
|
if( link_end == 0 )
|
|
return 0;
|
|
|
|
cclose = data[link_end - 1];
|
|
|
|
switch( cclose )
|
|
{
|
|
case '"':
|
|
copen = '"'; break;
|
|
|
|
case '\'':
|
|
copen = '\''; break;
|
|
|
|
case ')':
|
|
copen = '('; break;
|
|
|
|
case ']':
|
|
copen = '['; break;
|
|
|
|
case '}':
|
|
copen = '{'; break;
|
|
}
|
|
|
|
if( copen != 0 )
|
|
{
|
|
size_t closing = 0;
|
|
size_t opening = 0;
|
|
size_t i = 0;
|
|
|
|
/* Try to close the final punctuation sign in this same line;
|
|
* if we managed to close it outside of the URL, that means that it's
|
|
* not part of the URL. If it closes inside the URL, that means it
|
|
* is part of the URL.
|
|
*
|
|
* Examples:
|
|
*
|
|
* foo http://www.pokemon.com/Pikachu_(Electric) bar
|
|
* => http://www.pokemon.com/Pikachu_(Electric)
|
|
*
|
|
* foo (http://www.pokemon.com/Pikachu_(Electric)) bar
|
|
* => http://www.pokemon.com/Pikachu_(Electric)
|
|
*
|
|
* foo http://www.pokemon.com/Pikachu_(Electric)) bar
|
|
* => http://www.pokemon.com/Pikachu_(Electric))
|
|
*
|
|
* (foo http://www.pokemon.com/Pikachu_(Electric)) bar
|
|
* => foo http://www.pokemon.com/Pikachu_(Electric)
|
|
*/
|
|
|
|
while( i < link_end )
|
|
{
|
|
if( data[i] == copen )
|
|
opening++;
|
|
else if( data[i] == cclose )
|
|
closing++;
|
|
|
|
i++;
|
|
}
|
|
|
|
if( closing != opening )
|
|
link_end--;
|
|
}
|
|
|
|
return link_end;
|
|
}
|
|
|
|
|
|
static size_t check_domain( uint8_t* data, size_t size, int allow_short )
|
|
{
|
|
size_t i, np = 0;
|
|
|
|
if( !isalnum( data[0] ) )
|
|
return 0;
|
|
|
|
for( i = 1; i < size - 1; ++i )
|
|
{
|
|
if( data[i] == '.' )
|
|
np++;
|
|
else if( !isalnum( data[i] ) && data[i] != '-' )
|
|
break;
|
|
}
|
|
|
|
if( allow_short )
|
|
{
|
|
/* We don't need a valid domain in the strict sense (with
|
|
* least one dot; so just make sure it's composed of valid
|
|
* domain characters and return the length of the the valid
|
|
* sequence. */
|
|
return i;
|
|
}
|
|
else
|
|
{
|
|
/* a valid domain needs to have at least a dot.
|
|
* that's as far as we get */
|
|
return np ? i : 0;
|
|
}
|
|
}
|
|
|
|
|
|
size_t sd_autolink__www( size_t* rewind_p, struct buf* link, uint8_t* data,
|
|
size_t max_rewind, size_t size, unsigned int flags )
|
|
{
|
|
size_t link_end;
|
|
|
|
if( max_rewind > 0 && !ispunct( data[-1] ) && !isspace( data[-1] ) )
|
|
return 0;
|
|
|
|
if( size < 4 || memcmp( data, "www.", strlen( "www." ) ) != 0 )
|
|
return 0;
|
|
|
|
link_end = check_domain( data, size, 0 );
|
|
|
|
if( link_end == 0 )
|
|
return 0;
|
|
|
|
while( link_end < size && !isspace( data[link_end] ) )
|
|
link_end++;
|
|
|
|
link_end = autolink_delim( data, link_end, max_rewind, size );
|
|
|
|
if( link_end == 0 )
|
|
return 0;
|
|
|
|
bufput( link, data, link_end );
|
|
*rewind_p = 0;
|
|
|
|
return (int) link_end;
|
|
}
|
|
|
|
|
|
size_t sd_autolink__email( size_t* rewind_p, struct buf* link, uint8_t* data,
|
|
size_t max_rewind, size_t size, unsigned int flags )
|
|
{
|
|
size_t link_end;
|
|
int rewind;
|
|
int nb = 0, np = 0;
|
|
|
|
for( rewind = 0; rewind < (int)max_rewind; ++rewind )
|
|
{
|
|
uint8_t c = data[-rewind - 1];
|
|
|
|
if( isalnum( c ) )
|
|
continue;
|
|
|
|
if( strchr( ".+-_", c ) != NULL )
|
|
continue;
|
|
|
|
break;
|
|
}
|
|
|
|
if( rewind == 0 )
|
|
return 0;
|
|
|
|
for( link_end = 0; link_end < size; ++link_end )
|
|
{
|
|
uint8_t c = data[link_end];
|
|
|
|
if( isalnum( c ) )
|
|
continue;
|
|
|
|
if( c == '@' )
|
|
nb++;
|
|
else if( c == '.' && link_end < size - 1 )
|
|
np++;
|
|
else if( c != '-' && c != '_' )
|
|
break;
|
|
}
|
|
|
|
if( link_end < 2 || nb != 1 || np == 0
|
|
|| !isalpha( data[link_end - 1] ) )
|
|
return 0;
|
|
|
|
link_end = autolink_delim( data, link_end, max_rewind, size );
|
|
|
|
if( link_end == 0 )
|
|
return 0;
|
|
|
|
bufput( link, data - rewind, link_end + rewind );
|
|
*rewind_p = rewind;
|
|
|
|
return link_end;
|
|
}
|
|
|
|
|
|
size_t sd_autolink__url( size_t* rewind_p, struct buf* link, uint8_t* data,
|
|
size_t max_rewind, size_t size, unsigned int flags )
|
|
{
|
|
size_t link_end, domain_len;
|
|
|
|
if( size < 4 || data[1] != '/' || data[2] != '/' )
|
|
return 0;
|
|
|
|
int rewind = 0;
|
|
|
|
while( rewind < (int)max_rewind && isalpha( data[-rewind - 1] ) )
|
|
rewind++;
|
|
|
|
if( !sd_autolink_issafe( data - rewind, size + rewind ) )
|
|
return 0;
|
|
|
|
link_end = strlen( "://" );
|
|
|
|
domain_len = check_domain(
|
|
data + link_end,
|
|
size - link_end,
|
|
flags & SD_AUTOLINK_SHORT_DOMAINS );
|
|
|
|
if( domain_len == 0 )
|
|
return 0;
|
|
|
|
link_end += domain_len;
|
|
|
|
while( link_end < size && !isspace( data[link_end] ) )
|
|
link_end++;
|
|
|
|
link_end = autolink_delim( data, link_end, max_rewind, size );
|
|
|
|
if( link_end == 0 )
|
|
return 0;
|
|
|
|
bufput( link, data - rewind, link_end + rewind );
|
|
*rewind_p = rewind;
|
|
|
|
return link_end;
|
|
}
|