@ -61,6 +61,39 @@ XML_TAG::XML_TAG(int l, String f, String v)
}
/*
Field and line terminators must be interpreted as sequence of unsigned char .
Otherwise , non - ascii terminators will be negative on some platforms ,
and positive on others ( depending on the implementation of char ) .
*/
class Term_string
{
const uchar * m_ptr ;
uint m_length ;
int m_initial_byte ;
public :
Term_string ( const String & str ) :
m_ptr ( static_cast < const uchar * > ( static_cast < const void * > ( str . ptr ( ) ) ) ) ,
m_length ( str . length ( ) ) ,
m_initial_byte ( ( uchar ) ( str . length ( ) ? str . ptr ( ) [ 0 ] : INT_MAX ) )
{ }
void set ( const uchar * str , uint length , int initial_byte )
{
m_ptr = str ;
m_length = length ;
m_initial_byte = initial_byte ;
}
void reset ( ) { set ( NULL , 0 , INT_MAX ) ; }
const uchar * ptr ( ) const { return m_ptr ; }
uint length ( ) const { return m_length ; }
int initial_byte ( ) const { return m_initial_byte ; }
bool eq ( const Term_string & other ) const
{
return length ( ) = = other . length ( ) & & ! memcmp ( ptr ( ) , other . ptr ( ) , length ( ) ) ;
}
} ;
# define GET (stack_pos != stack ? *--stack_pos : my_b_get(&cache))
# define PUSH(A) *(stack_pos++)=(A)
@ -69,10 +102,10 @@ class READ_INFO {
String data ; /* Read buffer */
uint fixed_length ; /* Length of the fixed length record */
uint max_length ; /* Max length of row */
const uchar * field_term_ptr , * line_term_ptr ;
const char * line_start_ptr , * line_start_end ;
uint field_term_length , line_term_length , enclosed_length ;
int field_term_char , line_term_char , enclosed_char , escape_char ;
Term_string m_field_term ; /* FIELDS TERMINATED BY 'string' */
Term_string m_line_term ; /* LINES TERMINATED BY 'string' */
Term_string m_line_start ; /* LINES STARTING BY 'string' */
int enclosed_char , escape_char ;
int * stack , * stack_pos ;
bool found_end_of_line , start_of_line , eof ;
NET * io_net ;
@ -101,7 +134,11 @@ public:
int read_fixed_length ( void ) ;
int next_line ( void ) ;
char unescape ( char chr ) ;
int terminator ( const uchar * ptr , uint length ) ;
bool terminator ( const uchar * ptr , uint length ) ;
bool terminator ( const Term_string & str )
{ return terminator ( str . ptr ( ) , str . length ( ) ) ; }
bool terminator ( int chr , const Term_string & str )
{ return str . initial_byte ( ) = = chr & & terminator ( str ) ; }
bool find_start_of_fields ( ) ;
/* load xml */
List < XML_TAG > taglist ;
@ -1348,8 +1385,9 @@ READ_INFO::READ_INFO(THD *thd, File file_par, uint tot_length, CHARSET_INFO *cs,
String & field_term , String & line_start , String & line_term ,
String & enclosed_par , int escape , bool get_it_from_net ,
bool is_fifo )
: file ( file_par ) , fixed_length ( tot_length ) , escape_char ( escape ) ,
found_end_of_line ( false ) , eof ( false ) ,
: file ( file_par ) , fixed_length ( tot_length ) ,
m_field_term ( field_term ) , m_line_term ( line_term ) , m_line_start ( line_start ) ,
escape_char ( escape ) , found_end_of_line ( false ) , eof ( false ) ,
error ( false ) , line_cuted ( false ) , found_null ( false ) , read_charset ( cs )
{
data . set_thread_specific ( ) ;
@ -1358,39 +1396,17 @@ READ_INFO::READ_INFO(THD *thd, File file_par, uint tot_length, CHARSET_INFO *cs,
Otherwise , non - ascii terminators will be negative on some platforms ,
and positive on others ( depending on the implementation of char ) .
*/
field_term_ptr =
static_cast < const uchar * > ( static_cast < const void * > ( field_term . ptr ( ) ) ) ;
field_term_length = field_term . length ( ) ;
line_term_ptr =
static_cast < const uchar * > ( static_cast < const void * > ( line_term . ptr ( ) ) ) ;
line_term_length = line_term . length ( ) ;
level = 0 ; /* for load xml */
if ( line_start . length ( ) = = 0 )
{
line_start_ptr = 0 ;
start_of_line = 0 ;
}
else
{
line_start_ptr = line_start . ptr ( ) ;
line_start_end = line_start_ptr + line_start . length ( ) ;
start_of_line = 1 ;
}
start_of_line = line_start . length ( ) ! = 0 ;
/* If field_terminator == line_terminator, don't use line_terminator */
if ( field_term_length = = line_term_length & &
! memcmp ( field_term_ptr , line_term_ptr , field_term_length ) )
{
line_term_length = 0 ;
line_term_ptr = NULL ;
}
enclosed_char = ( enclosed_length = enclosed_par . length ( ) ) ?
( uchar ) enclosed_par [ 0 ] : INT_MAX ;
field_term_char = field_term_length ? field_term_ptr [ 0 ] : INT_MAX ;
line_term_char = line_term_length ? line_term_ptr [ 0 ] : INT_MAX ;
if ( m_field_term . eq ( m_line_term ) )
m_line_term . reset ( ) ;
enclosed_char = enclosed_par . length ( ) ? ( uchar ) enclosed_par [ 0 ] : INT_MAX ;
/* Set of a stack for unget if long terminators */
uint length = MY_MAX ( cs - > mbmaxlen , MY_MAX ( field_term_length , line_term_length ) ) + 1 ;
uint length = MY_MAX ( cs - > mbmaxlen , MY_MAX ( m_field_term . length ( ) ,
m_line_term . length ( ) ) ) + 1 ;
set_if_bigger ( length , line_start . length ( ) ) ;
stack = stack_pos = ( int * ) thd - > alloc ( sizeof ( int ) * length ) ;
@ -1432,7 +1448,7 @@ READ_INFO::~READ_INFO()
}
inline int READ_INFO : : terminator ( const uchar * ptr , uint length )
inline bool READ_INFO : : terminator ( const uchar * ptr , uint length )
{
int chr = 0 ; // Keep gcc happy
uint i ;
@ -1444,11 +1460,11 @@ inline int READ_INFO::terminator(const uchar *ptr,uint length)
}
}
if ( i = = length )
return 1 ;
return true ;
PUSH ( chr ) ;
while ( i - - > 1 )
PUSH ( * - - ptr ) ;
return 0 ;
return false ;
}
@ -1516,12 +1532,12 @@ int READ_INFO::read_field()
chr = escape_char ;
}
# ifdef ALLOW_LINESEPARATOR_IN_STRINGS
if ( chr = = line_term_char )
if ( chr = = m_line_term . initial_byte ( ) )
# else
if ( chr = = line_term_char & & found_enclosed_char = = INT_MAX )
if ( chr = = m_line_term . initial_byte ( ) & & found_enclosed_char = = INT_MAX )
# endif
{
if ( terminator ( line_ter m_ptr , line_term_length ) )
if ( terminator ( m_line_term ) )
{ // Maybe unexpected linefeed
enclosed = 0 ;
found_end_of_line = 1 ;
@ -1538,9 +1554,7 @@ int READ_INFO::read_field()
continue ;
}
// End of enclosed field if followed by field_term or line_term
if ( chr = = my_b_EOF | |
( chr = = line_term_char & & terminator ( line_term_ptr ,
line_term_length ) ) )
if ( chr = = my_b_EOF | | terminator ( chr , m_line_term ) )
{
/* Maybe unexpected linefeed */
enclosed = 1 ;
@ -1549,8 +1563,7 @@ int READ_INFO::read_field()
row_end = ( uchar * ) data . end ( ) ;
return 0 ;
}
if ( chr = = field_term_char & &
terminator ( field_term_ptr , field_term_length ) )
if ( terminator ( chr , m_field_term ) )
{
enclosed = 1 ;
row_start = ( uchar * ) data . ptr ( ) + 1 ;
@ -1565,9 +1578,10 @@ int READ_INFO::read_field()
/* copy the found term character to 'to' */
chr = found_enclosed_char ;
}
else if ( chr = = field_term_char & & found_enclosed_char = = INT_MAX )
else if ( chr = = m_field_term . initial_byte ( ) & &
found_enclosed_char = = INT_MAX )
{
if ( terminator ( field_ter m_ptr , field_term_length ) )
if ( terminator ( m_field_term ) )
{
enclosed = 0 ;
row_start = ( uchar * ) data . ptr ( ) ;
@ -1665,13 +1679,10 @@ int READ_INFO::read_fixed_length()
data . append ( ( uchar ) unescape ( ( char ) chr ) ) ;
continue ;
}
if ( chr = = line_term_char )
{
if ( terminator ( line_term_ptr , line_term_length ) )
{ // Maybe unexpected linefeed
found_end_of_line = 1 ;
break ;
}
if ( terminator ( chr , m_line_term ) )
{ // Maybe unexpected linefeed
found_end_of_line = true ;
break ;
}
data . append ( chr ) ;
}
@ -1690,14 +1701,14 @@ found_eof:
int READ_INFO : : next_line ( )
{
line_cuted = 0 ;
start_of_line = line_start_ptr ! = 0 ;
start_of_line = m_line_start . length ( ) ! = 0 ;
if ( found_end_of_line | | eof )
{
found_end_of_line = 0 ;
return eof ;
}
found_end_of_line = 0 ;
if ( ! line_term_length )
if ( ! m_line_term . length ( ) )
return 0 ; // No lines
for ( ; ; )
{
@ -1725,10 +1736,11 @@ int READ_INFO::next_line()
or a broken byte sequence was found .
Check if the sequence is a prefix of the " LINES TERMINATED BY " string .
*/
if ( ( uchar ) buf [ 0 ] = = line_term_char & & i < = line_term_length & &
! memcmp ( buf , line_term_ptr , i ) )
if ( ( uchar ) buf [ 0 ] = = m_line_term . initial_byte ( ) & &
i < = m_line_term . length ( ) & &
! memcmp ( buf , m_line_term . ptr ( ) , i ) )
{
if ( line_term_length = = i )
if ( m_line_term . length ( ) = = i )
{
/*
We found a " LINES TERMINATED BY " string that consists
@ -1742,10 +1754,11 @@ int READ_INFO::next_line()
that still needs to be checked is ( line_term_length - i ) .
Note , READ_INFO : : terminator ( ) assumes that the leftmost byte of the
argument is already scanned from the file and is checked to
be a known prefix ( e . g . against line_term_char ) .
be a known prefix ( e . g . against line_term . initial _char( ) ) .
So we need to pass one extra byte .
*/
if ( terminator ( line_term_ptr + i - 1 , line_term_length - i + 1 ) )
if ( terminator ( m_line_term . ptr ( ) + i - 1 ,
m_line_term . length ( ) - i + 1 ) )
return 0 ;
}
/*
@ -1768,7 +1781,7 @@ int READ_INFO::next_line()
return 1 ;
continue ;
}
if ( buf [ 0 ] = = line_term_char & & terminator ( line_term_ptr , line_term_length ) )
if ( terminator ( buf [ 0 ] , m_line_term ) )
return 0 ;
line_cuted = true ;
}
@ -1777,30 +1790,12 @@ int READ_INFO::next_line()
bool READ_INFO : : find_start_of_fields ( )
{
int chr ;
try_again :
do
for ( int chr = GET ; chr ! = my_b_EOF ; chr = GET )
{
if ( ( chr = GET ) = = my_b_EOF )
{
found_end_of_line = eof = 1 ;
return 1 ;
}
} while ( ( char ) chr ! = line_start_ptr [ 0 ] ) ;
for ( const char * ptr = line_start_ptr + 1 ; ptr ! = line_start_end ; ptr + + )
{
chr = GET ; // Eof will be checked later
if ( ( char ) chr ! = * ptr )
{ // Can't be line_start
PUSH ( chr ) ;
while ( - - ptr ! = line_start_ptr )
{ // Restart with next char
PUSH ( * ptr ) ;
}
goto try_again ;
}
if ( terminator ( chr , m_line_start ) )
return false ;
}
return 0 ;
return ( found_end_of_line = eof = true ) ;
}
@ -1990,11 +1985,11 @@ int READ_INFO::read_xml(THD *thd)
}
// row tag should be in ROWS IDENTIFIED BY '<row>' - stored in line_term
if ( ( tag . length ( ) = = line_term_length - 2 ) & &
( memcmp ( tag . ptr ( ) , line_term_ptr + 1 , tag . length ( ) ) = = 0 ) )
if ( ( tag . length ( ) = = m_line_term . length ( ) - 2 ) & &
( memcmp ( tag . ptr ( ) , m_line_term . ptr ( ) + 1 , tag . length ( ) ) = = 0 ) )
{
DBUG_PRINT ( " read_xml " , ( " start-of-row: %i %s %s " ,
level , tag . c_ptr_safe ( ) , line_term_ptr ) ) ;
level , tag . c_ptr_safe ( ) , m_line_term . ptr ( ) ) ) ;
}
if ( chr = = ' ' | | chr = = ' > ' )
@ -2061,8 +2056,8 @@ int READ_INFO::read_xml(THD *thd)
chr = my_tospace ( GET ) ;
}
if ( ( tag . length ( ) = = line_term_length - 2 ) & &
( memcmp ( tag . ptr ( ) , line_term_ptr + 1 , tag . length ( ) ) = = 0 ) )
if ( ( tag . length ( ) = = m_line_term . length ( ) - 2 ) & &
( memcmp ( tag . ptr ( ) , m_line_term . ptr ( ) + 1 , tag . length ( ) ) = = 0 ) )
{
DBUG_PRINT ( " read_xml " , ( " found end-of-row %i %s " ,
level , tag . c_ptr_safe ( ) ) ) ;