//============================================================================// // A simple parser class to read text file and deal with strings // // // // Chao Peng // // 06/07/2016 // //============================================================================// #include "ConfigParser.h" #include <cstring> #include <climits> #include <algorithm> using namespace std; //============================================================================// // Constructors, Destructor, Assignment Operators // //============================================================================// // constructor, with format input ConfigParser::ConfigParser(Format f) : form(f), line_number(0) { // place holder } // copy constructor, only copy format ConfigParser::ConfigParser(const ConfigParser &that) : form(that.form), line_number(0) { // place holder } // move constructor, only move format ConfigParser::ConfigParser(ConfigParser &&that) : form(that.form), line_number(0) { // place holder } // desctructor ConfigParser::~ConfigParser() { CloseFile(); } // copy assignment operator ConfigParser &ConfigParser::operator = (const ConfigParser &rhs) { form = rhs.form; return *this; } // move assignment operator ConfigParser &ConfigParser::operator = (ConfigParser &&rhs) { form = rhs.form; return *this; } //============================================================================// // Public Member Function // //============================================================================// // open a file for future parsing bool ConfigParser::OpenFile(const string &path, size_t cap) { Clear(); infile.open(path); if(!infile.is_open()) return false; buf.data.resize(cap); // success! return true; } // read the whole file into a buffer and break it into lines bool ConfigParser::ReadFile(const string &path) { Clear(); infile.open(path); if(!infile.is_open()) return false; infile.seekg(0, ios::end); buf.end = infile.tellg(); buf.data.resize(buf.end); infile.seekg(0, ios::beg); infile.read(&buf.data[0], buf.end); infile.close(); return true; } // close file void ConfigParser::CloseFile() { return infile.close(); } // clear stored lines void ConfigParser::Clear() { buf.Reset(); // reset line line_number = 0; cur_line.Reset(); // close file CloseFile(); } // read a buffer in void ConfigParser::ReadBuffer(const char *buf_in) { Clear(); buf.end = strlen(buf_in); buf.data.resize(buf.end + 2); strncpy(&buf.data[0], buf_in, buf.end); } // parse a line from the file or buffer // if the line is empty (all white spaces or comments), it will be skipped // return false if reached the end bool ConfigParser::ParseLine() { elements.clear(); while(elements.empty()) { if(!getLine(cur_line)) return false; // count the line number ++line_number; parseBuffer(cur_line); } return true; } // parse the whole file or buffer // return false if nothing was found bool ConfigParser::ParseAll() { elements.clear(); while(true) { if(!getLine(cur_line)) return !elements.empty(); // count the line number ++line_number; parseBuffer(cur_line); } } // parse an input string, split the string into elements // the trail white spaces in the elements will be trimmed int ConfigParser::ParseString(const string &line) { deque<string> eles = split(line.c_str(), line.size(), form.split); int count = 0; for(auto &ele : eles) { string trim_ele = trim(ele, form.white); if(!trim_ele.empty()) { elements.emplace_back(move(trim_ele)); count++; } } return count; } // check if the current elemtns number is in the range [num, num + optional] // output a warning message if not bool ConfigParser::CheckElements(int num, int optional) { string num_str; if(optional > 0) { if((elements.size() >= (size_t)num) && (elements.size() <= (size_t)(num + optional))) { return true; } num_str = to_string(num) + " - " + to_string(num + optional); } else if(optional == 0) { if(elements.size() == (size_t)num) { return true; } num_str = to_string(num); } else { // optional < 0 if(elements.size() >= (size_t)num) { return true; } num_str = " >= " + to_string(num); } cout << "Config Parser Warning: Wrong format at line " << line_number << ", expecting " << num_str << " elements. " << endl << "\"" << cur_line.String() << "\"" << endl; return false; } // take the first element ConfigValue ConfigParser::TakeFirst() { if(elements.empty()) { cout << "Config Parser Warning: Trying to take elements while there is " << "nothing, 0 value returned." << endl; return ConfigValue("0"); } ConfigValue output(move(elements.front())); elements.pop_front(); return output; } //============================================================================// // Private Member Function // //============================================================================// // get buffer from the file or the input buffer // return false if reached input end bool ConfigParser::getBuffer() { if(buf.begin < buf.end) return true; if(!infile.is_open() || infile.bad() || infile.eof()) return false; infile.read(&buf.data[0], buf.data.size()); buf.begin = 0; buf.end = infile.gcount(); return true; } // trim white spaces inline void trimbuf(const vector<char> &buf, size_t &begin, size_t &end, const string &w) { while(begin < end) { if(w.find(buf[begin]) == string::npos) break; begin++; } while(end > begin) { if(w.find(buf[end - 1]) == string::npos) break; end--; } } inline bool compare(const char ch, const string &str, size_t &c1) { if(str.empty()) return false; c1 = (ch == str[c1]) ? (c1 + 1) : 0; return (c1 >= str.size()); } inline bool rcompare(const ConfigParser::CharBuffer &buf, const string &str) { if(str.empty() || buf.end <= buf.begin || buf.end - buf.begin < str.size()) return false; for(size_t i = 1; i <= str.size(); ++i) { if(str[str.size() - i] != buf[buf.end - i]) return false; } return true; } // a helper structure to check context status struct TextStatus { int val; size_t cmt1, cmt2, delim; TextStatus() : val(0), cmt1(0), cmt2(0), delim(0) {} inline void Set(int i) {val = i; cmt1 = 0; cmt2 = 0;} }; // get a line from the file or buffer // it deals with comments, white spaces // return false if reached the end bool ConfigParser::getLine(CharBuffer &line_buf, bool recursive) { if(!recursive) line_buf.Reset(); bool success = false; TextStatus stat; while(getBuffer()) { success = true; while(buf.begin < buf.end) { auto &ch = buf[buf.begin++]; switch(stat.val) { default: case 0: line_buf.Add(ch); // check if it is the end if(compare(ch, form.delim, stat.delim)) { line_buf.end -= form.delim.size(); trimbuf(line_buf.data, line_buf.begin, line_buf.end, form.white); // glue lines if(rcompare(line_buf, form.glue)) { line_buf.end -= form.glue.size(); return getLine(line_buf, true); } else { return success; } } else if(compare(ch, form.cmtopen, stat.cmt1)) { stat.Set(1); line_buf.end -= form.cmtopen.size(); } else if(compare(ch, form.cmtmark, stat.cmt2)) { stat.Set(2); line_buf.end -= form.cmtmark.size(); } break; case 1: if(compare(ch, form.cmtclose, stat.cmt1)) { stat.Set(0); } break; case 2: if(ch == '\n') { stat.Set(0); buf.begin -= 1; } break; } } } trimbuf(line_buf.data, line_buf.begin, line_buf.end, form.white); return success; } // parse an input char buffer, split the string into elements // the trail white spaces in the elements will be trimmed int ConfigParser::parseBuffer(const CharBuffer &line) { if(line.begin >= line.end) return 0; size_t ele_begin = line.begin; int count = 0; // intended to visit i == line.end, so the rest of the string get parsed for(size_t i = 0; i <= line.end; ++i) { if(i == line.end || form.split.find(line[i]) != string::npos) { size_t ele_end = i; trimbuf(line.data, ele_begin, ele_end, form.white); if(ele_begin < ele_end) { elements.emplace_back(&line[ele_begin], ele_end - ele_begin); count++; } ele_begin = i + 1; } } return count; } //============================================================================// // Public Static Function // //============================================================================// // comment out a string, remove chars from the comment mark to the line break void ConfigParser::comment_line(string &str, const string &c, const string &b) { // no need to continue if(str.empty() || c.empty() || b.empty()) return; // loop until no marks found while(true) { size_t c_begin = str.find(c); if(c_begin != string::npos) { size_t c_end = str.find(b, c_begin + c.size()); // found, comment out until the line break if(c_end != string::npos) { // do not remove line break str.erase(c_begin, c_end - c_begin); // not found, comment out until the end } else { str.erase(c_begin); // can stop now, since everything afterwards is removed return; } } else { // comment marks not found return; } } } // comment out between a pair of comment marks // NOTICE: does not support nested structure of comment marks void ConfigParser::comment_between(string &str, const string &open, const string &close) { // no need to continue if(str.empty() || open.empty() || close.empty()) return; while(true) { // find the openning comment mark size_t pos1 = str.find(open); if(pos1 != string::npos) { size_t pos2 = str.find(close, pos1 + open.size()); // found pair if(pos2 != string::npos) { // remove everything between, including this pair str.erase(pos1, pos2 + close.size() - pos1); // comment pair not found } else { return; } } else { // comment pair not found return; } } } // trim all the characters defined as white space at both ends string ConfigParser::trim(const string &str, const string &w) { const auto strBegin = str.find_first_not_of(w); if (strBegin == string::npos) return ""; // no content const auto strEnd = str.find_last_not_of(w); const auto strRange = strEnd - strBegin + 1; return str.substr(strBegin, strRange); } // split a string into several pieces by all the characters defined as splitter deque<string> ConfigParser::split(const string &str, const string &s) { deque<string> eles; char *cstr = new char[str.length() + 1]; strcpy(cstr, str.c_str()); char *pch = strtok(cstr, s.c_str()); while(pch != nullptr) { eles.emplace_back(pch); pch = strtok(nullptr, s.c_str()); } delete[] cstr; return eles; } // split a char array into several pieces deque<string> ConfigParser::split(const char* str, const size_t &len, const string &s) { deque<string> eles; char *str_cpy = new char[len + 1]; strncpy(str_cpy, str, len); // end of C string str_cpy[len] = '\0'; char *pch = strtok(str_cpy, s.c_str()); while(pch != nullptr) { eles.emplace_back(pch); pch = strtok(nullptr, s.c_str()); } delete[] str_cpy; return eles; } // split a string and convert all parts to float numbers vector<int> ConfigParser::stois(const string &str, const string &s, const string &w) { vector<int> res; for(auto &val : split(str, s)) { res.push_back(stoi(trim(val, w))); } return res; } // split a string and convert all parts to float numbers vector<float> ConfigParser::stofs(const string &str, const string &s, const string &w) { vector<float> res; for(auto &val : split(str, s)) { res.push_back(stof(trim(val, w))); } return res; } // split a string and convert all parts to double numbers vector<double> ConfigParser::stods(const string &str, const string &s, const string &w) { vector<double> res; for(auto &val : split(str, s)) { res.push_back(stod(trim(val, w))); } return res; } // get the split part at num string ConfigParser::get_split_part(int num, const char *str, const char &s) { // unavailable if(num < 0) return ""; int beg = 0, cur = 0; while(str[cur] != '\0') { if(str[cur] == s) { // number macthed if(num-- == 0) { return string(&str[beg], cur - beg); // update segment } else { beg = cur + 1; } } ++cur; } // last element if(num == 0) return string(&str[beg], cur - beg); return ""; } // check if the short string is the same with the first part of a long string bool cstr_cmp_helper(const char *cmp, const char *str, int size) { for(int i = 0; i < size; ++i) { if(cmp[i] != str[i] || cmp[i] == '\0' || str[i] == '\0') return false; } if(cmp[size] != '\0') return false; return true; } // split a long string and find if a short string is belong to its elements int ConfigParser::get_part_count(const char *cmp, const char *str, const char &s) { int cnt = 0, beg = 0, cur = 0; while(str[cur] != '\0') { if(str[cur] == s) { if(cstr_cmp_helper(cmp, &str[beg], cur - beg)) { return cnt; } ++cnt; beg = cur + 1; } ++cur; } if(cstr_cmp_helper(cmp, &str[beg], cur-beg)) return cnt; return -1; } // find the integer in a string int ConfigParser::find_integer(const string &str, const size_t &pos) { vector<int> integers = find_integers(str); if(pos >= integers.size()) { cerr << "Config Parser: Cannot find " << pos + 1 << " integers from " << "\"" << str << "\"." << endl; return 0; } return integers.at(pos); } // find all the integers in a string vector<int> ConfigParser::find_integers(const string &str) { vector<int> result; find_integer_helper(str, result); return result; } // helper function for finding a integer void ConfigParser::find_integer_helper(const string &str, vector<int> &result) { if(str.empty()) return; int negative = 1; auto numBeg = str.find_first_of("-0123456789"); if(numBeg == string::npos) return; // check negative sign string str2 = str.substr(numBeg); if(str2.at(0) == '-') { negative = -1; int num_check; do { str2.erase(0, 1); if(str2.empty()) return; num_check = str2.at(0) - '0'; } while (num_check > 9 || num_check < 0); } auto numEnd = str2.find_first_not_of("0123456789"); if(numEnd == string::npos) numEnd = str2.size(); int num = 0; size_t i = 0; for(; i < numEnd; ++i) { if( (num > INT_MAX/10) || (num == INT_MAX/10 && ((str2.at(i) - '0') > (INT_MAX - num*10))) ) { ++i; break; } num = num*10 + str2.at(i) - '0'; } result.push_back(negative*num); find_integer_helper(str2.substr(i), result); } // return the lower case of this string string ConfigParser::str_lower(const string &str) { string res = str; for(auto &c : res) { c = tolower(c); } return res; } // return the upper case of this string string ConfigParser::str_upper(const string &str) { string res = str; for(auto &c : res) { c = toupper(c); } return res; } // remove characters in ignore list string ConfigParser::str_remove(const string &str, const string &iignore) { string res = str; for(auto &c : iignore) { res.erase(remove(res.begin(), res.end(), c), res.end()); } return res; } // replace characters in the list with certain char string ConfigParser::str_replace(const string &str, const string &list, const char &rc) { if(list.empty()) return str; string res = str; for(auto &c : res) { if(list.find(c) != string::npos) c = rc; } return res; } // compare two strings, can be case insensitive bool ConfigParser::case_ins_equal(const string &str1, const string &str2) { if(str1.size() != str2.size()) { return false; } for(auto c1 = str1.begin(), c2 = str2.begin(); c1 != str1.end(); ++c1, ++c2) { if(tolower(*c1) != tolower(*c2)) { return false; } } return true; } // find the first pair position in a string // it will return the most outer pair if the first pair was in a nested structure pair<size_t, size_t> ConfigParser::find_pair(const string &str, const string &open, const string &close, size_t pos) { pair<size_t, size_t> res(string::npos, string::npos); if(open.empty() || close.empty() || str.size() <= pos) return res; res.first = str.find(open, pos); // pair not found if(res.first == string::npos) { return res; } int open_bracket = 1; size_t search_beg = res.first + open.size(); // loop for nested structure while(open_bracket > 0) { size_t next_close = str.find(close, search_beg); // pair not found if(next_close == string::npos) { // change back to npos for the not-found indication res.first = string::npos; return res; } // check for nested structure size_t next_open = str.find(open, search_beg); // the comparison is based on the definition of string::npos // npos for not found is size_t = -1, which is the biggest size_t value // find another open before close if(next_open < next_close) { open_bracket++; search_beg = next_open + open.size(); // else cases // 1. close mark found before open mark // 2. close mark found, open mark not // 3. close mark is the same as open mark, so the position is the same } else { open_bracket--; search_beg = next_close + close.size(); res.second = next_close; } } return res; } // get file name and directory from a path ConfigParser::PathInfo ConfigParser::decompose_path(const string &path) { PathInfo res; if(path.empty()) return res; // find directory auto dir_pos = path.find_last_of("/"); if(dir_pos != string::npos) { res.dir = path.substr(0, dir_pos); res.name = path.substr(dir_pos + 1); } else { res.name = path; } // find extension auto ext_pos = res.name.find_last_of("."); if(ext_pos != string::npos) { res.ext = res.name.substr(ext_pos + 1); res.name = res.name.substr(0, ext_pos); } return res; } // form the path string ConfigParser::compose_path(const ConfigParser::PathInfo &path) { string res(path.dir); res.reserve(path.dir.size() + path.name.size() + path.ext.size() + 2); if(!res.empty() && res.back() != '/') res += '/'; res += path.name; if(!path.ext.empty()) res += "." + path.ext; return res; } // form a path from given directory and file name, automatically add / if it is missing string ConfigParser::form_path(const string &dir, const string &file) { string file_path; file_path.reserve(dir.size() + file.size() + 1); file_path = dir; if(file_path.size() && file_path.back() != '/') file_path += "/"; file_path += file; return file_path; } // read a file and return its content in a char string string ConfigParser::file_to_string(const string &path) { ifstream inf(path); if(!inf.is_open()) return ""; // read the whole file in string str; inf.seekg(0, ios::end); str.reserve(inf.tellg()); inf.seekg(0, ios::beg); str.assign((istreambuf_iterator<char>(inf)), istreambuf_iterator<char>()); inf.close(); return str; } // break text file into several blocks in the format // <label> <open_mark> <content> <close_mark> // return extracted <residual> {<label> <content>} ConfigParser::TextBlocks ConfigParser::break_into_blocks(const string &buf, const string &open, const string &close, const string &seps) { TextBlocks result; if(buf.empty() || open.empty() || close.empty()) return result; size_t last_end = 0; // loop until no blocks found while(true) { // find the contents in block brackets auto p = find_pair(buf, open, close, last_end); // no pair found anymore if(p.first == string::npos || p.second == string::npos) break; // add content TextBlock block; block.content = trim(buf.substr(p.first + open.size(), p.second - p.first - open.size()), seps); // find label string head = buf.substr(last_end, p.first - last_end); if(head.empty()) { block.label = ""; } else { // find end of label auto end = head.find_last_not_of(seps); if(end == string::npos) end = head.size() - 1; // find begin of label auto beg = head.find_last_of(seps, end); if(beg == string::npos) beg = 0; // add label block.label = trim(head.substr(beg, end - beg + 1), seps); // other content goes to residual result.residual += head.substr(0, beg); } // combine blocks result.blocks.emplace_back(move(block)); last_end = p.second + close.size(); } // trim result.residual = trim(result.residual, seps); return result; }