23 : m_tagHandler( ph ), m_current( 0 ), m_root( 0 ), m_xmlnss( 0 ), m_state( Initial ),
24 m_preamble( 0 ), m_return( ParseIncomplete ), m_quote( false ), m_haveTagPrefix( false ),
25 m_haveAttribPrefix( false ), m_attribIsXmlns( false ), m_deleteRoot( deleteRoot ),
36 Parser::DecodeState Parser::decode( std::string::size_type& pos,
const std::string& data )
38 std::string::size_type p = data.find(
';', pos );
39 std::string::size_type diff = p - pos;
41 if( p == std::string::npos )
43 m_backBuffer = data.substr( pos );
44 return DecodeInsufficient;
47 if( diff < 3 || diff > 9 )
51 switch( data[pos + 1] )
58 if( data[pos + 2] ==
'x' || data[pos + 2] ==
'X' )
65 const long int val = std::strtol( data.data() + pos + idx, &end, base );
66 if( *end !=
';' || val < 0 )
69 if( val == 0x9 || val == 0xA || val == 0xD || ( val >= 0x20 && val <= 0x7F ) )
73 else if( val >= 0x80 && val <= 0x7FF )
75 rep += char( 192 + ( val >> 6 ) );
76 rep += char( 128 + ( val % 64 ) );
78 else if( ( val >= 0x800 && val <= 0xD7FF ) || ( val >= 0xE000 && val <= 0xFFFD ) )
80 rep += char( 224 + ( val >> 12 ) );
81 rep += char( 128 + ( ( val >> 6 ) % 64 ) );
82 rep += char( 128 + ( val % 64 ) );
84 else if( val >= 0x100000 && val < 0x10FFFF )
86 rep += char( 240 + ( val >> 18 ) );
87 rep += char( 128 + ( ( val >> 12 ) % 64 ) );
88 rep += char( 128 + ( ( val >> 6 ) % 64 ) );
89 rep += char( 128 + ( val % 64 ) );
96 if( diff == 3 && data[pos + 2] ==
't' )
102 if( diff == 3 && data[pos + 2] ==
't' )
105 return DecodeInvalid;
108 if( diff == 5 && !data.compare( pos + 1, 5,
"apos;" ) )
110 else if( diff == 4 && !data.compare( pos + 1, 4,
"amp;" ) )
113 return DecodeInvalid;
116 if( diff == 5 && !data.compare( pos + 1, 5,
"quot;" ) )
119 return DecodeInvalid;
122 return DecodeInvalid;
130 case TagAttributeValue:
140 Parser::ForwardScanState Parser::forwardScan( std::string::size_type& pos,
const std::string& data,
141 const std::string& needle )
143 if( pos + needle.length() <= data.length() )
145 if( !data.compare( pos, needle.length(), needle ) )
147 pos += needle.length() - 1;
152 return ForwardNotFound;
157 m_backBuffer = data.substr( pos );
158 return ForwardInsufficientSize;
164 if( !m_backBuffer.empty() )
166 data.insert( 0, m_backBuffer );
170 std::string::size_type count = data.length();
171 for( std::string::size_type i = 0; i < count; ++i )
173 const unsigned char c = data[i];
179 return static_cast<int>( i );
186 if( isWhitespace( c ) )
192 m_state = TagOpening;
196 return static_cast<int>( i );
203 if( isWhitespace( c ) )
209 m_state = TagOpening;
223 if( isWhitespace( c ) )
232 return static_cast<int>( i );
235 m_state = TagClosingSlash;
238 m_state = TagNameCollect;
242 if( i + 1 >= data.length() )
245 switch( data[i + 1] )
248 switch( forwardScan( i, data,
"![CDATA[" ) )
251 m_state = TagCDATASection;
253 case ForwardNotFound:
257 case ForwardInsufficientSize:
262 switch( forwardScan( i, data,
"!-- " ) )
265 m_state = XMLComment;
267 case ForwardNotFound:
271 case ForwardInsufficientSize:
277 return static_cast<int>( i );
283 m_state = TagNameCollect;
287 case TagCDATASection:
291 switch( forwardScan( i, data,
"]]>" ) )
296 case ForwardNotFound:
299 case ForwardInsufficientSize:
311 switch( forwardScan( i, data,
" -->" ) )
316 case ForwardNotFound:
318 case ForwardInsufficientSize:
325 if( isWhitespace( c ) )
327 m_state = TagNameComplete;
338 return static_cast<int>( i );
341 m_state = TagOpeningSlash;
348 if( !m_haveTagPrefix )
350 m_haveTagPrefix =
true;
357 return static_cast<int>( i );
372 m_state = TagOpening;
376 switch( decode( i, data ) )
382 return static_cast<int>( i );
383 case DecodeInsufficient:
392 case TagOpeningSlash:
394 if( isWhitespace( c ) )
404 return static_cast<int>( i );
412 return static_cast<int>( i );
415 case TagClosingSlash:
417 if( isWhitespace( c ) )
426 return static_cast<int>( i );
430 m_state = TagClosing;
444 return static_cast<int>( i );
447 if( !m_haveTagPrefix )
449 m_haveTagPrefix =
true;
456 return static_cast<int>( i );
464 return static_cast<int>( i );
473 case TagNameComplete:
475 if( isWhitespace( c ) )
484 return static_cast<int>( i );
487 m_state = TagOpeningSlash;
490 if( m_preamble == 1 )
493 return static_cast<int>( i );
499 if( m_preamble == 1 )
504 return static_cast<int>( i );
509 m_state = TagAttribute;
515 if( isWhitespace( c ) )
517 m_state = TagAttributeComplete;
530 return static_cast<int>( i );
533 m_state = TagAttributeEqual;
536 if( !m_haveAttribPrefix && m_attrib !=
XMLNS )
538 m_haveAttribPrefix =
true;
539 m_attribPrefix = m_attrib;
542 else if( m_attrib ==
XMLNS )
544 m_attribIsXmlns =
true;
550 return static_cast<int>( i );
557 case TagAttributeComplete:
559 if( isWhitespace( c ) )
565 m_state = TagAttributeEqual;
569 return static_cast<int>( i );
573 case TagAttributeEqual:
575 if( isWhitespace( c ) )
583 m_state = TagAttributeValue;
587 return static_cast<int>( i );
591 case TagAttributeValue:
597 return static_cast<int>( i );
607 m_state = TagNameAlmostComplete;
612 switch( decode( i, data ) )
618 return static_cast<int>( i );
619 case DecodeInsufficient:
628 case TagNameAlmostComplete:
630 if( isWhitespace( c ) )
632 m_state = TagNameComplete;
639 m_state = TagOpeningSlash;
642 if( m_preamble == 1 )
645 return static_cast<int>( i );
651 if( m_preamble == 1 )
656 return static_cast<int>( i );
661 return static_cast<int>( i );
675 void Parser::addTag()
680 m_root =
new Tag( m_tag );
686 m_current =
new Tag( m_current, m_tag );
689 if( m_haveTagPrefix )
693 m_haveTagPrefix =
false;
696 if( m_attribs.size() )
718 streamEvent( m_root );
725 if( m_root && m_root == m_current && m_tagPrefix ==
"stream" )
728 if( m_tag ==
"xml" && m_preamble == 2 )
732 void Parser::addAttribute()
734 Tag::Attribute* attr =
new Tag::Attribute( m_attrib, m_value );;
735 if( m_attribIsXmlns )
740 (*m_xmlnss)[m_attrib] = m_value;
741 attr->setPrefix(
XMLNS );
746 if( !m_attribPrefix.empty() )
747 attr->setPrefix( m_attribPrefix );
748 if( m_attrib ==
XMLNS )
751 m_attribs.push_back( attr );
755 m_haveAttribPrefix =
false;
756 m_attribIsXmlns =
false;
759 void Parser::addCData()
761 if( m_current && !m_cdata.empty() )
770 bool Parser::closeTag()
774 if( m_tag ==
"stream" && m_tagPrefix ==
"stream" )
777 if( !m_current || m_current->
name() != m_tag
778 || ( !m_current->
prefix().empty() && m_current->
prefix() != m_tagPrefix ) )
790 m_haveTagPrefix =
false;
793 m_current = m_current->
parent();
797 streamEvent( m_root );
819 m_haveAttribPrefix =
false;
820 m_haveTagPrefix =
false;
829 bool Parser::isValid(
unsigned char c )
831 return ( c != 0xc0 || c != 0xc1 || c < 0xf5 );
834 bool Parser::isWhitespace(
unsigned char c )
836 return ( c == 0x09 || c == 0x0a || c == 0x0d || c == 0x20 );
839 void Parser::streamEvent( Tag* tag )
848 p.m_nullRoot =
false;
849 int i = p.
feed( data );
850 if( i == -1 && p.m_return == ParseOK )
851 return p.m_root->
clone();