00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059 #include <ctype.h>
00060
00061 #include <iomanip>
00062 #include <string>
00063 #include <sstream>
00064
00065 #include "GNURegex.h"
00066 #include "Error.h"
00067 #include "InternalErr.h"
00068
00069 #include "debug.h"
00070
00071 using namespace std;
00072
00073 namespace libdap {
00074
00075
00076
00077
00078
00079 string
00080 hexstring(unsigned char val)
00081 {
00082 ostringstream buf;
00083 buf << hex << setw(2) << setfill('0') << static_cast<unsigned int>(val);
00084
00085 return buf.str();
00086 }
00087
00088 string
00089 unhexstring(string s)
00090 {
00091 int val;
00092 istringstream ss(s);
00093 ss >> hex >> val;
00094 char tmp_str[2];
00095 tmp_str[0] = static_cast<char>(val);
00096 tmp_str[1] = '\0';
00097 return string(tmp_str);
00098 }
00099
00100 string
00101 octstring(unsigned char val)
00102 {
00103 ostringstream buf;
00104 buf << oct << setw(3) << setfill('0')
00105 << static_cast<unsigned int>(val);
00106
00107 return buf.str();
00108 }
00109
00110 string
00111 unoctstring(string s)
00112 {
00113 int val;
00114
00115 istringstream ss(s);
00116 ss >> oct >> val;
00117
00118 DBG(cerr << "unoctstring: " << val << endl);
00119
00120 char tmp_str[2];
00121 tmp_str[0] = static_cast<char>(val);
00122 tmp_str[1] = '\0';
00123 return string(tmp_str);
00124 }
00125
00150 string
00151 id2www(string in, const string &allowable)
00152 {
00153 string::size_type i = 0;
00154 DBG(cerr<<"Input string: [" << in << "]" << endl);
00155 while ((i = in.find_first_not_of(allowable, i)) != string::npos) {
00156 DBG(cerr<<"Found escapee: [" << in[i] << "]");
00157 in.replace(i, 1, "%" + hexstring(in[i]));
00158 DBGN(cerr<<" now the string is: " << in << endl);
00159 i += 3;
00160 }
00161
00162 return in;
00163 }
00164
00175 string
00176 id2www_ce(string in, const string &allowable)
00177 {
00178 return id2www(in, allowable);
00179 }
00180
00213 string
00214 www2id(const string &in, const string &escape, const string &except)
00215 {
00216 string::size_type i = 0;
00217 string res = in;
00218 while ((i = res.find_first_of(escape, i)) != string::npos) {
00219 if (except.find(res.substr(i, 3)) != string::npos) {
00220 i += 3;
00221 continue;
00222 }
00223 res.replace(i, 3, unhexstring(res.substr(i + 1, 2)));
00224 ++i;
00225 }
00226
00227 return res;
00228 }
00229
00230 static string
00231 entity(char c)
00232 {
00233 switch (c) {
00234 case '>': return ">";
00235 case '<': return "<";
00236 case '&': return "&";
00237 case '\'': return "'";
00238 case '\"': return """;
00239 default:
00240 throw InternalErr(__FILE__, __LINE__, "Unrecognized character.");
00241 }
00242 }
00243
00244
00245
00246 string
00247 octal_to_hex(const string &octal_digits)
00248 {
00249 int val;
00250
00251 istringstream ss(octal_digits);
00252 ss >> oct >> val;
00253
00254 ostringstream ds;
00255 ds << hex << setw(2) << setfill('0') << val;
00256 return ds.str();
00257 }
00258
00265 string
00266 id2xml(string in, const string ¬_allowed)
00267 {
00268 string::size_type i = 0;
00269
00270 while ((i = in.find_first_of(not_allowed, i)) != string::npos) {
00271 in.replace(i, 1, entity(in[i]));
00272 ++i;
00273 }
00274 #if 0
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286 string octal_escape = "\\\\";
00287 i = 0;
00288 string::size_type length = in.length();
00289 while ((i = in.find(octal_escape, i)) != string::npos) {
00290
00291 string::size_type j = i + 2;
00292 if (j + 1 >= length)
00293 break;
00294 string octal_digits = in.substr(j, 3);
00295
00296 string hex_escape = string("&#x");
00297 hex_escape.append(octal_to_hex(octal_digits));
00298 hex_escape.append(string(";"));
00299
00300
00301 in.replace(i, 5, hex_escape);
00302
00303
00304 i += 6;
00305 }
00306 #endif
00307 return in;
00308 }
00309
00315 string
00316 xml2id(string in)
00317 {
00318 string::size_type i = 0;
00319
00320 while ((i = in.find(">", i)) != string::npos)
00321 in.replace(i, 4, ">");
00322
00323 i = 0;
00324 while ((i = in.find("<", i)) != string::npos)
00325 in.replace(i, 4, "<");
00326
00327 i = 0;
00328 while ((i = in.find("&", i)) != string::npos)
00329 in.replace(i, 5, "&");
00330
00331 i = 0;
00332 while ((i = in.find("'", i)) != string::npos)
00333 in.replace(i, 6, "'");
00334
00335 i = 0;
00336 while ((i = in.find(""", i)) != string::npos)
00337 in.replace(i, 6, "\"");
00338
00339 return in;
00340 }
00341
00347 string
00348 esc2underscore(string s)
00349 {
00350 string::size_type pos;
00351 while ((pos = s.find('%')) != string::npos)
00352 s.replace(pos, 3, "_");
00353
00354 return s;
00355 }
00356
00357
00361 string
00362 escattr(string s)
00363 {
00364 const string printable = " ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789~`!@#$%^&*()_-+={[}]|\\:;<,>.?/'\"";
00365 const string ESC = "\\";
00366 const string DOUBLE_ESC = ESC + ESC;
00367 const string QUOTE = "\"";
00368 const string ESCQUOTE = ESC + QUOTE;
00369
00370
00371 string::size_type ind = 0;
00372 while ((ind = s.find_first_not_of(printable, ind)) != s.npos)
00373 s.replace(ind, 1, ESC + octstring(s[ind]));
00374
00375
00376 ind = 0;
00377 while ((ind = s.find(ESC, ind)) != s.npos) {
00378 s.replace(ind, 1, DOUBLE_ESC);
00379 ind += DOUBLE_ESC.length();
00380 }
00381
00382
00383 ind = 0;
00384 while ((ind = s.find(QUOTE, ind)) != s.npos) {
00385 s.replace(ind, 1, ESCQUOTE);
00386 ind += ESCQUOTE.length();
00387 }
00388
00389 return s;
00390 }
00391
00400 string
00401 unescattr(string s)
00402 {
00403 Regex octal("\\\\[0-3][0-7][0-7]");
00404 Regex esc_quote("\\\\\"");
00405 Regex esc_esc("\\\\\\\\");
00406 const string ESC = "\\";
00407 const string QUOTE = "\"";
00408 int matchlen;
00409 unsigned int index;
00410
00411 DBG(cerr << "0XX" << s << "XXX" << endl);
00412
00413 index = esc_esc.search(s.c_str(), s.length(), matchlen, 0);
00414 while (index < s.length()) {
00415 DBG(cerr << "1aXX" << s << "XXX index: " << index << endl);
00416 s.replace(index, 2, ESC);
00417 DBG(cerr << "1bXX" << s << "XXX index: " << index << endl);
00418 index = esc_esc.search(s.c_str(), s.length(), matchlen, 0);
00419 }
00420
00421
00422 index = esc_quote.search(s.c_str(), s.length(), matchlen, 0);
00423 while (index < s.length()) {
00424 s.replace(index, 2, QUOTE);
00425 DBG(cerr << "2XX" << s << "XXX index: " << index << endl);
00426 index = esc_quote.search(s.c_str(), s.length(), matchlen, 0);
00427 }
00428
00429
00430 index = octal.search(s.c_str(), s.length(), matchlen, 0);
00431 while (index < s.length()) {
00432 s.replace(index, 4, unoctstring(s.substr(index + 1, 3)));
00433 DBG(cerr << "3XX" << s << "XXX index: " << index << endl);
00434 index = octal.search(s.c_str(), s.length(), matchlen, 0);
00435 }
00436
00437 DBG(cerr << "4XX" << s << "XXX" << endl);
00438 return s;
00439 }
00440
00441 string
00442 munge_error_message(string msg)
00443 {
00444
00445 if (*msg.begin() != '"')
00446 msg.insert(msg.begin(), '"');
00447 if (*(msg.end() - 1) != '"')
00448 msg += "\"";
00449
00450
00451 string::iterator miter;
00452 for (miter = msg.begin() + 1; miter != msg.end() - 1; miter++)
00453 if (*miter == '"' && *(miter - 1) != '\\')
00454 miter = msg.insert(miter, '\\');
00455
00456 return msg;
00457 }
00458
00463 string
00464 escape_double_quotes(string source)
00465 {
00466 string::size_type idx = 0;
00467 while((idx = source.find('\"', idx)) != string::npos) {
00468 source.replace(idx, 1, "\\\"");
00469 idx += 2;
00470 }
00471
00472 return source;
00473 }
00474
00480 string
00481 unescape_double_quotes(string source)
00482 {
00483 string::size_type idx = 0;
00484 while((idx = source.find("\\\"", idx)) != string::npos) {
00485 source.replace(idx, 2, "\"");
00486 ++idx;
00487 }
00488
00489 return source;
00490 }
00491
00492 }
00493