00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <Parser.h>
00023 #include <cctype>
00024 #include <Exception.h>
00025
00026 using namespace xmlight;
00027 using namespace std;
00028
00030 bool iselementchar(const char& c)
00031 {
00032 return isalpha(c) || isdigit(c) || c == ':' || c == '-' || c == '_' || c == '.';
00033 }
00034
00036 bool notelementchar(const char& c)
00037 {
00038 return ! iselementchar(c);
00039 }
00040
00041 inline void Parser::parse_tag() {
00042
00043 string::const_iterator begin, mark, data_end;
00044 begin = data.begin();
00045 data_end = data.end();
00046
00047 if(*begin == '/') {
00048
00049 mark = find_if(begin + 1, data_end, notelementchar);
00050 if(mark != data_end) {
00051 _on_error(string("Invalid char ") + *mark + string("in end-element <") + data + string(">"));
00052 }else {
00053 string name(begin + 1, data_end);
00054 on_end_element(name);
00055 element_count--;
00056 }
00057 }else if(*begin == '!') {
00058
00059 return;
00060 }else if(*begin == '?') {
00061
00062 if(string(begin, begin + 14) == "?xml version=\"") {
00063 if(has_xmldecl) {
00064 _on_error("Document allready declared!");
00065 }else {
00066
00067 has_xmldecl = true;
00068 }
00069 }else {
00070
00071 }
00072 }else {
00073
00074 if(! has_root_element) has_root_element = true;
00075 bool without_end_element = false;
00076 if(data_end[-1] == '/') without_end_element = true;
00077 mark = find_if(begin, data_end, notelementchar);
00078 if(mark != data_end) {
00079
00080 string name = string(begin, mark);
00081 TAttributes attributes;
00082 begin = mark;
00083
00084 for(;;) {
00085 string attr_name, attr_value;
00086 mark = find_if(begin, data_end, iselementchar);
00087 if(mark == data_end) {
00088 break;
00089 }else if(*mark == '/' && (mark + 1) == data_end) {
00090 break;
00091 }else {
00092 begin = mark;
00093
00094 mark = find_if(begin, data_end, notelementchar);
00095 if(mark == data_end) {
00096 _on_error(string("Unexpected end in tag <") + data + string(">"));
00097 return;
00098 }else if(*mark == '=' && (mark[1] == '"' || mark[1] == '\'')) {
00099 ++mark;
00100 attr_name = string(begin, mark - 1);
00101
00102
00103
00104 begin = mark + 1;
00105
00106 mark = find(begin, data_end, *mark);
00107 if(mark == data_end) {
00108 _on_error(string("Unexpected end in tag <") + data + string(">"));
00109 return;
00110 }else {
00111 attr_value = string(begin, mark);
00112 begin = mark;
00113 }
00114
00115 }else {
00116 _on_error(string("Invalid char ") + *mark + string(" in tag <") + data + string(">"));
00117 return;
00118 }
00119
00120 }
00121 attributes.insert(make_pair(attr_name, attr_value));
00122 }
00123
00124 on_start_element(name, attributes);
00125 if(without_end_element) {
00126 on_end_element(name);
00127 }else {
00128 ++element_count;
00129 }
00130
00131 }else {
00132
00133 on_start_element(data, TAttributes());
00134 ++element_count;
00135 }
00136 }
00137 }
00138
00139 void Parser::parse_string(const string& xmldata)
00140 {
00141 string::const_iterator position;
00142
00143 for(position = xmldata.begin(); position != xmldata.end(); ++position) {
00144 if(parsing_tag) {
00145 if(*position == '>') {
00146
00147 parsing_tag = false;
00148 parse_tag();
00149 data.clear();
00150 }else if(*position == '[' && data == "![CDATA") {
00151
00152
00153 parsing_tag = false;
00154 parsing_cdata = true;
00155 }else if(*position == '-' && data == "!-") {
00156
00157 parsing_tag = false;
00158 parsing_comment = true;
00159 data.clear();
00160 }else if(*position == '[' && string(data.begin(), data.begin() + 9) == "!DOCTYPE ") {
00161
00162
00163 parsing_tag = false;
00164 parsing_dtd = true;
00165 data.clear();
00166 }else {
00167 data.push_back(*position);
00168 }
00169 }else if(parsing_comment) {
00170 if(*position == '>' && data.size() >= 3 && string(data.end() - 2, data.end()) == "--") {
00171
00172 parsing_comment = false;
00173 string comment = string(data.begin(), data.end() - 2);
00174 on_comment(comment);
00175 data.clear();
00176 }else{
00177 data.push_back(*position);
00178 }
00179 }else if(parsing_cdata) {
00180 if(*position == '>' && data.size() >= 2 && string(data.end() - 2, data.end()) == "]]") {
00181
00182 parsing_cdata = false;
00183 data.clear();
00184 }else{
00185 data.push_back(*position);
00186 }
00187 }else if(parsing_dtd) {
00188 if(*position == '<') {
00189 ++dtd_count;
00190 }else if(*position == '>') {
00191 if(dtd_count > 0) {
00192 dtd_count--;
00193 }else {
00194 parsing_dtd = false;
00195 }
00196 }
00197 }else if(*position == '<') {
00198
00199 if(has_root_element) {
00200 on_data(data);
00201 data.clear();
00202 }
00203 parsing_tag = true;
00204 }else if(has_root_element) {
00205
00206 data.push_back(*position);
00207
00208 }
00209
00210 }
00211
00212 }