Hello everyone;
I'm trying to extract data from xml files.
at the moment I have two set of xml files that are two and three levels deep respectively.
1st:
-state
-date_range
2nd:
-date
-dollar_range
-gender
In the first instance, I would like to grab all the date_range (value and count) for every state:
E.g.
AK,86,00-06,26,07-12,26,13-24,34
AL,....
And the in the second instance, get the get the gender for every dollar_range, in every date.
E.g.
200602,3122,000-005,305,M,57 F,220
200601,....
I'm hoping to extract and load all related value/count to a vector of structure. For instance, in the output example 2;
I would have a vector to hold a dollar_range (index) and all gender value/count for that particular range (index).
then, another vector with a date index and all the dollar_ranges for that particular date.
Right now I'm just experimenting, so, any better design is highly apppreciate... as well as any other help.
Please, excuse my spaghetti code.
Thanks in advance
daula
I'm trying to extract data from xml files.
at the moment I have two set of xml files that are two and three levels deep respectively.
1st:
-state
-date_range
2nd:
-date
-dollar_range
-gender
In the first instance, I would like to grab all the date_range (value and count) for every state:
E.g.
AK,86,00-06,26,07-12,26,13-24,34
AL,....
And the in the second instance, get the get the gender for every dollar_range, in every date.
E.g.
200602,3122,000-005,305,M,57 F,220
200601,....
I'm hoping to extract and load all related value/count to a vector of structure. For instance, in the output example 2;
I would have a vector to hold a dollar_range (index) and all gender value/count for that particular range (index).
then, another vector with a date index and all the dollar_ranges for that particular date.
Right now I'm just experimenting, so, any better design is highly apppreciate... as well as any other help.
Please, excuse my spaghetti code.
Thanks in advance
daula
Code:
[c++]
#include <libxml/xmlreader.h>
#include <iostream>
#include <fstream>
#include <exception>
#include <vector>
#include <algorithm>
#include <functional>
#include <map>
#include "common.h"
using namespace std;
/*struct associate
{
string value;
unsigned int count;
associate(string v, unsigned int c) : value(v), count(c){};
associate() : value(""), count(0){};
};
*/
struct fields
{
string index;
string sValue;
unsigned int sCount;
} data;
struct content_1
{
std::vector <struct fields> vLevelTwo;
} two_d;
struct content_2
{
std::vector <struct fields> vLevelTwo;
std::vector <struct fields> vLevelThree;
} three_d;
void populate_three_d ( map <string, struct content_2> & mm );
void populate_two_d ( map <string, struct content_1> & mm );
const vector<string> get_data ( const string & filename );
int main (int argc, char **argv)
{
int RetVal (0);
try
{
string filename(argv[1]);
fstream file(filename.c_str());
if(!file.is_open())
{
RetVal = -1;
string error ("Failed to open... " + filename);
cerr << error <<endl;
}
else
{
int pass = 1;
vector<string> mVec = get_data(filename);
int depth (mVec.size());
file.seekg (0, ios::beg);
string str, val, cnt, prev_par;
string temp ("=");
string temp2 (">");
string endian("</dps:tally_output>");
string value ("<dps:value>");
string count ("<dps:count>");
map<string, struct content_1> cMap;
map<string, struct content_2> cMap2;
three_d.vLevelThree.clear();
three_d.vLevelTwo.clear();
two_d.vLevelTwo.clear();
while(!file.eof())
{
getline(file, str);
if ( pass > 6 )
{
if (str.rfind(temp) != string::npos or trim(str) == endian)
{
string::size_type beg = (str.rfind(temp) + 2);
string::size_type end = (str.find_last_of(temp2) - 1);
string cur_str = (str.substr(beg, end-beg));
if( depth == 2 )
{
if(cur_str == mVec[1]) { two_d.vLevelTwo.push_back(data); }
if(cur_str == mVec[0] or trim(str) == endian)
{
//associate assoc(data.sValue,data.sCount);
cMap[data.sValue]=two_d;
}
}
if( depth ==3 )
{
if(cur_str == mVec[2]) { three_d.vLevelThree.push_back(data); }
if(cur_str == mVec[1])
{
three_d.vLevelTwo.push_back(data);
if(!three_d.vLevelThree.empty()) three_d.vLevelThree.erase (three_d.vLevelThree.end()-1);
}
if( cur_str == mVec[0] or trim(str) == endian )
{
//associate assoc(data.sValue,data.sCount);
cMap2[data.sValue]=three_d;
}
}
}
}
if(str.find(value)!= string::npos and pass > 7)
{
string str2(str); str2=trim2(str2);
string::size_type i = str2.find_first_of(">") + 1;
string::size_type j = str2.find_last_of("</") - 1;
val = str2.substr(i,(j-i));
data.sValue = trim(val);
cnt="";
}
if(str.find(count)!= string::npos and pass > 7)
{
string str2(str); str2=trim2(str2);
string::size_type k = str2.find_first_of(">") + 1;
string::size_type l = str2.find_last_of("</") - 1;
cnt = str2.substr(k,(l-k));
data.sCount = atoi(cnt.c_str());
val = "";
}
pass++;
}
if(depth==2) populate_two_d (cMap);
if(depth==3) populate_three_d (cMap2);
file.close();
}
}
catch (exception& e){ RetVal=-2; cout << "Error: " << e.what() << endl; }
return RetVal;
}
void populate_two_d ( map <string, struct content_1> & mm )
{
map <string, struct content_1>::iterator it = mm.begin();
vector<struct fields>::iterator vec;
for ( ; it != mm.end(); it++)
{
cout <<(*it).first<<endl;
// for(vec=(*it).second.vLevelTwo.begin(); vec != (*it).second.vLevelTwo.end(); vec++)
// cout <<(*vec).sValue<<","<< (*vec).sCount<<endl;
}
}
void populate_three_d ( map <string, struct content_2> & mm )
{
map <string, struct content_2>::iterator it = mm.begin();
vector<struct fields>::iterator vec;
vector<struct fields>::iterator vec2;
for ( ; it != mm.end(); it++)
{
cout <<(*it).first <<endl;
for(vec=(*it).second.vLevelThree.begin(); vec != (*it).second.vLevelThree.end(); vec++)
{
;;// cout<<(*vec).sValue <<","<<(*vec).sValue<<endl;
//for(vec2=(*it).second.vLevelThree.begin(); vec2 != (*it).second.vLevelThree.end(); vec2++)
//cout <<(*it).first<<","<<(*vec1).sValue<<","<< (*vec1).sCount <<","<<(*vec2).sValue<<","<< (*vec2).sCount<<endl;
}
}
}
const vector<string> get_data ( const string & filename )
{
vector<string> vec, data;
data.clear();
string buf;
string temp("=");
string temp2(">");
fstream file(filename.c_str(), ios::in);
if(file.is_open())
{
int i = 0;
while(!file.eof() and i < 12 )
{
getline(file, buf);
if ( i > 4 )
{
vec.push_back(trim(buf));
}
i++;
}
file.close();
}
for (vector<string>::iterator it=vec.begin(); it != vec.end() ; it++ )
{
string::size_type beg = 0, end = 0;
string str (*it);
if (str.rfind(temp) != string::npos )
{
beg = (str.rfind(temp) + 2);
end = (str.find_last_of(temp2) - 1);
data.push_back(str.substr(beg, end-beg));
}
else
break;
}
return data;
}
[xml sample]
<?xml version="1.0" encoding="utf-8" ?>
<dps:services xmlns:dps="uri">
<dps:output>
<dps:tally dps:name="test_1">
<dps:tally dps:name="date">
<dps:tally dps:name="currdollar">
<dps:tally dps:name="Gender">
<dps:value>U</dps:value>
<dps:count>4</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>M</dps:value>
<dps:count>57</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>I</dps:value>
<dps:count>5</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>F</dps:value>
<dps:count>220</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>C</dps:value>
<dps:count>1</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>A</dps:value>
<dps:count>18</dps:count>
</dps:tally>
<dps:value>000-005</dps:value>
<dps:count>305</dps:count>
</dps:tally>
<dps:tally dps:name="currdollar">
<dps:tally dps:name="Gender">
<dps:value>U</dps:value>
<dps:count>18</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>M</dps:value>
<dps:count>82</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>I</dps:value>
<dps:count>14</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>F</dps:value>
<dps:count>330</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>C</dps:value>
<dps:count>1</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>A</dps:value>
<dps:count>33</dps:count>
</dps:tally>
<dps:value>005-010</dps:value>
<dps:count>478</dps:count>
</dps:tally>
<dps:tally dps:name="currdollar">
<dps:tally dps:name="Gender">
<dps:value>U</dps:value>
<dps:count>20</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>M</dps:value>
<dps:count>109</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>I</dps:value>
<dps:count>25</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>F</dps:value>
<dps:count>490</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>C</dps:value>
<dps:count>6</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>B</dps:value>
<dps:count>1</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>A</dps:value>
<dps:count>56</dps:count>
</dps:tally>
<dps:value>010-015</dps:value>
<dps:count>707</dps:count>
</dps:tally>
<dps:tally dps:name="currdollar">
<dps:tally dps:name="Gender">
<dps:value>U</dps:value>
<dps:count>8</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>M</dps:value>
<dps:count>34</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>I</dps:value>
<dps:count>3</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>F</dps:value>
<dps:count>160</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>C</dps:value>
<dps:count>1</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>A</dps:value>
<dps:count>13</dps:count>
</dps:tally>
<dps:value>015-020</dps:value>
<dps:count>219</dps:count>
</dps:tally>
<dps:tally dps:name="currdollar">
<dps:tally dps:name="Gender">
<dps:value>U</dps:value>
<dps:count>30</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>M</dps:value>
<dps:count>131</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>I</dps:value>
<dps:count>17</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>F</dps:value>
<dps:count>683</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>C</dps:value>
<dps:count>6</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>A</dps:value>
<dps:count>74</dps:count>
</dps:tally>
<dps:value>020-025</dps:value>
<dps:count>941</dps:count>
</dps:tally>
<dps:tally dps:name="currdollar">
<dps:tally dps:name="Gender">
<dps:value>U</dps:value>
<dps:count>12</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>M</dps:value>
<dps:count>84</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>I</dps:value>
<dps:count>5</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>F</dps:value>
<dps:count>251</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>C</dps:value>
<dps:count>4</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>B</dps:value>
<dps:count>1</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>A</dps:value>
<dps:count>20</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>7</dps:value>
<dps:count>1</dps:count>
</dps:tally>
<dps:value>025-050</dps:value>
<dps:count>378</dps:count>
</dps:tally>
<dps:tally dps:name="currdollar">
<dps:tally dps:name="Gender">
<dps:value>U</dps:value>
<dps:count>2</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>M</dps:value>
<dps:count>18</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>I</dps:value>
<dps:count>1</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>F</dps:value>
<dps:count>59</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>A</dps:value>
<dps:count>9</dps:count>
</dps:tally>
<dps:value>050-075</dps:value>
<dps:count>89</dps:count>
</dps:tally>
<dps:tally dps:name="currdollar">
<dps:tally dps:name="Gender">
<dps:value>F</dps:value>
<dps:count>5</dps:count>
</dps:tally>
<dps:value>075-100</dps:value>
<dps:count>5</dps:count>
</dps:tally>
<dps:value>200602</dps:value>
<dps:count>3122</dps:count>
</dps:tally>
<dps:tally dps:name="date">
<dps:tally dps:name="currdollar">
<dps:tally dps:name="Gender">
<dps:value>U</dps:value>
<dps:count>6</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>M</dps:value>
<dps:count>37</dps:count>
</dps:tally>
<dps:tally dps:name="Gender">
<dps:value>I</dps:value>
<dps:count>4</dps:count>
</dps:tally>
.
.
.
</dps:tally>
</dps:tally>
</dps:output>
</dps:services>