/* * Copyright (c) 2011, Vicent Marti * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include "buffer.h" #include "autolink.h" #include #include #include #include #if defined(_WIN32) #define strncasecmp _strnicmp #endif int sd_autolink_issafe( const uint8_t* link, size_t link_len ) { static const size_t valid_uris_count = 5; static const char* valid_uris[] = { "/", "http://", "https://", "ftp://", "mailto:" }; size_t i; for( i = 0; i < valid_uris_count; ++i ) { size_t len = strlen( valid_uris[i] ); if( link_len > len && strncasecmp( (char*) link, valid_uris[i], len ) == 0 && isalnum( link[len] ) ) return 1; } return 0; } static size_t autolink_delim( uint8_t* data, size_t link_end, size_t max_rewind, size_t size ) { uint8_t cclose, copen = 0; size_t i; for( i = 0; i < link_end; ++i ) if( data[i] == '<' ) { link_end = i; break; } while( link_end > 0 ) { if( strchr( "?!.,", data[link_end - 1] ) != NULL ) link_end--; else if( data[link_end - 1] == ';' ) { size_t new_end = link_end - 2; while( new_end > 0 && isalpha( data[new_end] ) ) new_end--; if( new_end < link_end - 2 && data[new_end] == '&' ) link_end = new_end; else link_end--; } else break; } if( link_end == 0 ) return 0; cclose = data[link_end - 1]; switch( cclose ) { case '"': copen = '"'; break; case '\'': copen = '\''; break; case ')': copen = '('; break; case ']': copen = '['; break; case '}': copen = '{'; break; } if( copen != 0 ) { size_t closing = 0; size_t opening = 0; size_t i = 0; /* Try to close the final punctuation sign in this same line; * if we managed to close it outside of the URL, that means that it's * not part of the URL. If it closes inside the URL, that means it * is part of the URL. * * Examples: * * foo http://www.pokemon.com/Pikachu_(Electric) bar * => http://www.pokemon.com/Pikachu_(Electric) * * foo (http://www.pokemon.com/Pikachu_(Electric)) bar * => http://www.pokemon.com/Pikachu_(Electric) * * foo http://www.pokemon.com/Pikachu_(Electric)) bar * => http://www.pokemon.com/Pikachu_(Electric)) * * (foo http://www.pokemon.com/Pikachu_(Electric)) bar * => foo http://www.pokemon.com/Pikachu_(Electric) */ while( i < link_end ) { if( data[i] == copen ) opening++; else if( data[i] == cclose ) closing++; i++; } if( closing != opening ) link_end--; } return link_end; } static size_t check_domain( uint8_t* data, size_t size, int allow_short ) { size_t i, np = 0; if( !isalnum( data[0] ) ) return 0; for( i = 1; i < size - 1; ++i ) { if( data[i] == '.' ) np++; else if( !isalnum( data[i] ) && data[i] != '-' ) break; } if( allow_short ) { /* We don't need a valid domain in the strict sense (with * least one dot; so just make sure it's composed of valid * domain characters and return the length of the the valid * sequence. */ return i; } else { /* a valid domain needs to have at least a dot. * that's as far as we get */ return np ? i : 0; } } size_t sd_autolink__www( size_t* rewind_p, struct buf* link, uint8_t* data, size_t max_rewind, size_t size, unsigned int flags ) { size_t link_end; if( max_rewind > 0 && !ispunct( data[-1] ) && !isspace( data[-1] ) ) return 0; if( size < 4 || memcmp( data, "www.", strlen( "www." ) ) != 0 ) return 0; link_end = check_domain( data, size, 0 ); if( link_end == 0 ) return 0; while( link_end < size && !isspace( data[link_end] ) ) link_end++; link_end = autolink_delim( data, link_end, max_rewind, size ); if( link_end == 0 ) return 0; bufput( link, data, link_end ); *rewind_p = 0; return (int) link_end; } size_t sd_autolink__email( size_t* rewind_p, struct buf* link, uint8_t* data, size_t max_rewind, size_t size, unsigned int flags ) { size_t link_end, rewind; int nb = 0, np = 0; for( rewind = 0; rewind < max_rewind; ++rewind ) { uint8_t c = data[-rewind - 1]; if( isalnum( c ) ) continue; if( strchr( ".+-_", c ) != NULL ) continue; break; } if( rewind == 0 ) return 0; for( link_end = 0; link_end < size; ++link_end ) { uint8_t c = data[link_end]; if( isalnum( c ) ) continue; if( c == '@' ) nb++; else if( c == '.' && link_end < size - 1 ) np++; else if( c != '-' && c != '_' ) break; } if( link_end < 2 || nb != 1 || np == 0 || !isalpha( data[link_end - 1] ) ) return 0; link_end = autolink_delim( data, link_end, max_rewind, size ); if( link_end == 0 ) return 0; bufput( link, data - rewind, link_end + rewind ); *rewind_p = rewind; return link_end; } size_t sd_autolink__url( size_t* rewind_p, struct buf* link, uint8_t* data, size_t max_rewind, size_t size, unsigned int flags ) { size_t link_end, rewind = 0, domain_len; if( size < 4 || data[1] != '/' || data[2] != '/' ) return 0; while( rewind < max_rewind && isalpha( data[-rewind - 1] ) ) rewind++; if( !sd_autolink_issafe( data - rewind, size + rewind ) ) return 0; link_end = strlen( "://" ); domain_len = check_domain( data + link_end, size - link_end, flags & SD_AUTOLINK_SHORT_DOMAINS ); if( domain_len == 0 ) return 0; link_end += domain_len; while( link_end < size && !isspace( data[link_end] ) ) link_end++; link_end = autolink_delim( data, link_end, max_rewind, size ); if( link_end == 0 ) return 0; bufput( link, data - rewind, link_end + rewind ); *rewind_p = rewind; return link_end; }