librcsb-core-wrapper 1.005
ISTable.h
Go to the documentation of this file.
1//$$FILE$$
2//$$VERSION$$
3//$$DATE$$
4//$$LICENSE$$
5
6
14#ifndef ISTABLE_H
15#define ISTABLE_H
16
17
18#include <float.h>
19
20#include <string>
21#include <vector>
22#include <map>
23
24#include <rcsb/mapped_vector.h>
25#include <rcsb/mapped_vector.C>
26#include <rcsb/GenString.h>
27#include <rcsb/ITTable.h>
28#include <rcsb/Serializer.h>
29
30
31typedef std::multimap<std::string, unsigned int, StringLess> tIndex;
32
33
54{
55 public:
57
60
62 {
63 eNONE = 0,
71 // Used only in block diff to indicate missing table in first block
73 // Used only in block diff to indicate extra table in first block
74 eEXTRA
75 };
76
78
85
86#ifdef VLAD_SECOND_ITTABLE
87 enum eSearchType
88 {
89 eEQUAL = 0,
94 };
95#endif
96
98
101
102#ifdef VLAD_SECOND_ITTABLE
103 enum eSearchDir
104 {
105 eFORWARD = 0,
107 };
108#endif
109
110 static const unsigned char DT_STRING_VAL = 1;
111 static const unsigned char DT_INTEGER_VAL = 2;
112 // static const unsigned char DT_DOUBLE_VAL = 3;
113
114 // Sets string comparison case sensitive
115 static const unsigned char CASE_SENSE = 0x00;
116 // Sets string comparison case insensitive
117 static const unsigned char CASE_INSENSE = 0x01;
118 // Sets string comparison to be sensitive to whitespace
119 static const unsigned char W_SPACE_SENSE = 0x00;
120 // Sets string comparison to ignore repeating whitspace.
121 // Also ignores leading and trailing whitespace
122 static const unsigned char W_SPACE_INSENSE = 0x02;
123 // string datatype
124 static const unsigned char DT_STRING = DT_STRING_VAL << 4;
125 // integer datatype
126 static const unsigned char DT_INTEGER = DT_INTEGER_VAL << 4;
127 // VLAD FEATURE NOT WORKING double is not working, maybe integer. check it // double datatype
128 // static const unsigned char DT_DOUBLE = DT_DOUBLE_VAL << 4;
129
148
170 colCaseSense = Char::eCASE_SENSITIVE);
171
189 ISTable(const std::string& name,
190 const Char::eCompareType colCaseSense = Char::eCASE_SENSITIVE);
191
212 ISTable(const std::string& name, eOrientation orient,
213 const Char::eCompareType colCaseSense = Char::eCASE_SENSITIVE);
214
231 ISTable(const ISTable& inTable);
232
247
263 ISTable& operator=(const ISTable& inTable);
264
287
301 inline const std::string& GetName() const;
302
316 void SetName(const std::string& name);
317
331 inline unsigned int GetNumColumns() const;
332
346 const std::vector<std::string>& GetColumnNames() const;
347
362 bool IsColumnPresent(const std::string& colName);
363
392 void AddColumn(const std::string& colName,
393 const std::vector<std::string>& col = std::vector<std::string>());
394
437 void InsertColumn(const std::string& colName,
438 const std::string& afColName, const std::vector<std::string>& col =
439 std::vector<std::string>());
440
469 void FillColumn(const std::string& colName,
470 const std::vector<std::string>& col);
471
490 void GetColumn(std::vector<std::string>& col, const std::string& colName);
491
523 void GetColumn(std::vector<std::string>& col, const std::string& colName,
524 const unsigned int fromRowIndex, unsigned int toRowIndex);
525
549 void GetColumn(std::vector<std::string>& col, const std::string& colName,
550 const std::vector<unsigned int>& rowIndex);
551
574 void RenameColumn(const std::string& oldColName,
575 const std::string& newColName);
576
593 void ClearColumn(const std::string& colName);
594
611 void DeleteColumn(const std::string& colName);
612
626 inline unsigned int GetNumRows() const;
627
664 unsigned int AddRow(const std::vector<std::string>& row =
665 std::vector<std::string>());
666
713 unsigned int InsertRow(const unsigned int atRowIndex,
714 const std::vector<std::string>& row = std::vector<std::string>());
715
740 void FillRow(const unsigned int rowIndex,
741 const std::vector<std::string>& row);
742
782 void GetRow(std::vector<std::string>& row, const unsigned int rowIndex,
783 const std::string& fromColName = std::string(),
784 const std::string& toColName = std::string());
785
802 const std::vector<std::string>& GetRow(const unsigned int rowIndex);
803
819 void ClearRow(const unsigned int rowIndex);
820
839 void DeleteRow(const unsigned int rowIndex);
840
858 void DeleteRows(const std::vector<unsigned int>& rows);
859
873 inline unsigned int GetLastRowIndex();
874
898 void UpdateCell(const unsigned int rowIndex, const std::string& colName,
899 const std::string& value);
900
922 const std::string& operator()(const unsigned int rowIndex,
923 const std::string& colName) const;
924
949 void SetFlags(const std::string& colName, const unsigned char flags);
950
968 unsigned char GetDataType(const std::string& colName);
969
998 unsigned int FindFirst(const std::vector<std::string>& targets,
999 const std::vector<std::string>& colNames,
1000 const std::string& indexName = std::string());
1001
1025 void Search(std::vector<unsigned int>& res, const std::string& target,
1026 const std::string& colName, const unsigned int fromRowIndex = 0,
1027 const eSearchDir searchDir = eFORWARD,
1028 const eSearchType searchType = eEQUAL);
1029
1059 void Search(std::vector<unsigned int>& res,
1060 const std::vector<std::string>& targets,
1061 const std::vector<std::string>& colNames,
1062 const unsigned int fromRowIndex = 0,
1063 const eSearchDir searchDir = eFORWARD,
1064 const eSearchType searchType = eEQUAL,
1065 const std::string& indexName = std::string());
1066
1098 void FindDuplicateRows(std::vector<std::pair<unsigned int,
1099 unsigned int> >& duplRows, const std::vector<std::string>& colNames,
1100 const bool keepDuplRows, const eSearchDir searchDir = eFORWARD);
1101
1116 inline Char::eCompareType GetColCaseSense() const;
1117
1121 inline void SetModified(const bool modified);
1122
1126 inline bool GetModified();
1127
1132
1136 int WriteObject(Serializer* ser, int& size);
1137
1141 int GetObject(UInt32 index, Serializer* ser);
1142
1146 void Read(unsigned int indexInFile);
1147
1151 int Write();
1152
1156 // typeOfMerge is 0 for overwrite, 1 for overlap
1157 static ISTable* Merge(ISTable& firstTable, ISTable& secondTable,
1158 unsigned int typeOfMerge = 0);
1159
1163 bool PrintDiff(ISTable& inTable);
1164
1168 inline bool IndexExists(const std::string& indexName);
1169
1173 void CreateIndex(const std::string& indexName,
1174 const std::vector<std::string>& colNames,
1175 const unsigned int unique = 0);
1176
1180 void UpdateIndex(const std::string& indexName, const unsigned int rowIndex);
1181
1185 void RebuildIndex(const std::string& indexName);
1186
1191
1195 void DeleteIndex(const std::string& indexName);
1196
1200 inline unsigned int GetNumIndices();
1201
1205 void CreateKey(const std::vector<std::string>& colNames);
1206
1211
1215 static void SetUnion(const std::vector<unsigned int>& a,
1216 const std::vector<unsigned int>& b, std::vector<unsigned int>& ret);
1217
1221 static void SetIntersect(const std::vector<unsigned int>& a,
1222 const std::vector<unsigned int>& b, std::vector<unsigned int>& ret);
1223
1227 void GetColumnsIndices(std::vector<unsigned int>& colIndices,
1228 const std::vector<std::string>& colNames);
1229
1233 void GetColumn(std::vector<std::string>& col, const std::string& colName,
1234 const std::string& indexName);
1235
1236 private:
1237 static const unsigned int MAX_NUM_ITTABLE_ROWS = 1000;
1238
1239 // number of digit DBL_MIN_10_EXP, letter e is not included in size
1240 static const unsigned int EXPONENT = 4;
1241 static const unsigned int MAX_PRECISION = DBL_DIG;
1242 //???DBL_MANT_DIG;
1243 static const unsigned int MANTISSA = MAX_PRECISION + 2;
1244 static const unsigned int INT_LIMIT = 11;
1245
1246 // datatype mask
1247 static const unsigned char DT_MASK = 15 << 4;
1248 // string comparison sensitivity mask
1249 static const unsigned char SC_MASK = 0x01;
1250 // white space sensitivity mask
1251 static const unsigned char WS_MASK = 0x02;
1252 static const unsigned char LAST_DT_VALUE = 3;
1253 static const unsigned int DEFAULT_PRECISION = MAX_PRECISION;
1254 static const unsigned char DEFAULT_OPTIONS;
1255
1256 static const std::string _version;
1257
1258 std::string _name;
1259
1260 std::vector<ITTable> _ittables;
1261
1262 ITTable::eOrientation _orient;
1263
1264 Char::eCompareType _colCaseSense;
1265
1267
1268 std::vector<unsigned int> _precision;
1269 std::vector<unsigned char> _compare_opts;
1270
1271 std::vector<std::string> _indexNames;
1272 std::vector<std::vector<unsigned int> > _listsOfColumns;
1273 std::vector<unsigned int> _unique;
1274
1275 Serializer* _ser;
1276
1277 bool _modified; // Indicates whether table has been modified
1278
1279 unsigned int _numRows;
1280
1281 mutable unsigned int _rowIndexCache;
1282 mutable std::pair<unsigned int, unsigned int> _rowLocCache;
1283
1284 void InsertColumn(const std::string& colName, const unsigned int atColIndex,
1285 const std::vector<std::string>& col = std::vector<std::string>());
1286 void CreateColumn(const std::string& colName, const unsigned int atColIndex,
1287 const std::vector<std::string>& col = std::vector<std::string>());
1288 int UpdateCell(const std::string& cell, const unsigned int colIndex,
1289 const unsigned int rowIndex);
1290 const std::string& operator()(const unsigned int rowIndex,
1291 const unsigned int colIndex) const;
1292 int SetFlags(const unsigned char newOpts, const unsigned int colIndex);
1293 void FindDuplicateRows(const std::vector<unsigned int>& colIndices,
1294 std::vector<std::pair<unsigned int, unsigned int> >& duplRows,
1295 const unsigned int keep, const eSearchDir searchDir = eFORWARD);
1296 void VerifyColumnsIndices(const std::vector<unsigned int>& colIndices);
1297 bool AreListsOfColumnsValid(const std::vector<unsigned int>& colIndices);
1298 void CreateIndex(const std::string& indexName,
1299 const std::vector<unsigned int>& colIndices,
1300 const unsigned int unique = 0);
1301 void CreateKey(const std::vector<unsigned int>& colIndices);
1302 unsigned int FindFirst(const std::vector<std::string>& targets,
1303 const std::vector<unsigned int>& colIndices,
1304 const std::string& indexName = std::string());
1305 void Search(std::vector<unsigned int>& res,
1306 const std::vector<std::string>& targets,
1307 const std::vector<unsigned int>& colIndices,
1308 const unsigned int fromRowIndex = 0,
1309 const eSearchDir searchDir = eFORWARD,
1310 const eSearchType searchType = eEQUAL,
1311 const std::string& indexName = std::string());
1312
1313 void Init();
1314 void Clear();
1315
1317 GetCompareType(const std::vector<unsigned int>& colIndices);
1318
1319 std::string CellValue(const unsigned int colIndex,
1320 const unsigned int rowIndex);
1321 std::string ConvertString(const std::string& value,
1322 const unsigned int colIndex);
1323 std::string MultiStringsValue(const std::vector<std::string>& values,
1324 const std::vector<unsigned int>& colIndices);
1325 std::string SubRowValue(const std::vector<unsigned int>& colIndices,
1326 const unsigned int rowIndex);
1327 std::string AggregateRow(const std::vector<unsigned int>& colIndices,
1328 const unsigned int rowIndex);
1329
1330 inline void AppendToAndDelimit(std::string& to,
1331 const std::string& appending);
1332
1333 void ValidateOptions(unsigned int colIndex);
1334
1335 std::string CreateInternalIndexName(const unsigned int indexIndex);
1336 void UpdateIndex(const unsigned int indexIndex,
1337 const unsigned int rowIndex);
1338 void RebuildIndex(const unsigned int indexIndex);
1339 void ClearIndex(const unsigned int indexIndex);
1340 void DeleteIndex(const unsigned int indexIndex);
1341
1342 int FindIndex(const std::string& indexName);
1343 int FindIndex(const std::vector<unsigned int>& colIndices);
1344
1345 void UpdateIndices(const unsigned int rowIndex);
1346 void ClearIndices();
1347
1348 bool IsColumnInIndex(const unsigned int indexIndex,
1349 const unsigned int colIndex);
1350
1351 int FindKeyIndex();
1352
1353 void UpdateColListOnColInsert(const unsigned int colIndex);
1354 void UpdateColListOnColDelete(const unsigned int colIndex);
1355 void UpdateColListOnCellUpdate(const unsigned int rowIndex,
1356 const unsigned int colIndex);
1357
1358 unsigned int FindFirst(const std::vector<std::string>& targets,
1359 const std::vector<unsigned int>& colIndices,
1360 const unsigned int indexIndex);
1361
1362 int WriteObjectV9(Serializer*, int& size);
1363
1364 int GetObjectV9(UInt32 index, Serializer*);
1365 int GetObjectV8(UInt32 index, Serializer*);
1366 int GetObjectV7(UInt32 index, Serializer*);
1367 int GetObjectV6(UInt32 index, Serializer*);
1368 int GetObjectV3(UInt32 index, Serializer*);
1369 int GetObjectV2(UInt32 index, Serializer*);
1370 int GetObjectV1(UInt32 index, Serializer*);
1371 int GetObjectV1_1(UInt32 index, Serializer*);
1372
1373 void ConvertToInt(const std::string& a, std::string& ret);
1374 void ConvertDouble(const std::string& a, std::string& ret);
1375 void ConvertToLowerNoWhiteSpace(const std::string& a, std::string& ret);
1376
1377 void GetRowLocation(std::pair<unsigned int, unsigned int>& rowLoc,
1378 const unsigned int rowIndex) const;
1379 void CacheRowLocation(const unsigned int rowIndex) const;
1380
1381 void CreateSubtables(const unsigned int numRows);
1382 void CreateSubtableColumns(const unsigned int colIndex,
1383 const std::vector<std::string>& col);
1384 void CreateColumn(const unsigned int atColIndex,
1385 const std::vector<std::string>& col);
1386
1387 void Print(const std::string& indexName);
1388
1389 unsigned int GetColumnIndex(const std::string& colName) const;
1390
1391};
1392
1393
1394std::ostream& operator<<(std::ostream& out, const ISTable& isTable);
1395
1396
1397inline unsigned int ISTable::GetLastRowIndex()
1398{
1399
1400 return(GetNumRows() - 1);
1401
1402}
1403
1404
1405inline unsigned int ISTable::GetNumIndices()
1406{
1407
1408 return(_indexNames.size());
1409
1410}
1411
1412
1413inline bool ISTable::IndexExists(const std::string& indexName)
1414{
1415
1416 int ret = FindIndex(indexName);
1417
1418 if (ret == -1)
1419 {
1420 return(false);
1421 }
1422 else
1423 {
1424 return(true);
1425 }
1426
1427}
1428
1429
1430inline void ISTable::AppendToAndDelimit(std::string& to,
1431 const std::string& appending)
1432{
1433
1434 to += appending;
1435 // VLAD HARDCODED CONST
1436 to += " ";
1437
1438}
1439
1440
1441inline void ISTable::SetModified(const bool modified)
1442{
1443 _modified = modified;
1444}
1445
1446
1448{
1449 return _modified;
1450}
1451
1452
1453inline const std::string& ISTable::GetName() const
1454{
1455 return(_name);
1456}
1457
1458
1459inline unsigned int ISTable::GetNumRows() const
1460{
1461 return(_numRows);
1462}
1463
1464
1465inline unsigned int ISTable::GetNumColumns() const
1466{
1467 return(_colNames.size());
1468}
1469
1470
1472{
1473 return(_colCaseSense);
1474}
1475
1476
1477#endif // ISTABLE_H
std::multimap< std::string, unsigned int, StringLess > tIndex
Definition ISTable.h:31
std::ostream & operator<<(std::ostream &out, const ISTable &isTable)
Header file for ITTable class.
eCompareType
Definition GenString.h:27
@ eCASE_SENSITIVE
Definition GenString.h:28
Public class that respresents a two-dimensional table of strings.
Definition ISTable.h:54
void SetSerializer(Serializer *ser)
int GetObject(UInt32 index, Serializer *ser)
static const unsigned char W_SPACE_SENSE
Definition ISTable.h:119
bool IsColumnPresent(const std::string &colName)
void InsertColumn(const std::string &colName, const std::string &afColName, const std::vector< std::string > &col=std::vector< std::string >())
const std::string & operator()(const unsigned int rowIndex, const std::string &colName) const
void DeleteKey()
void AddColumn(const std::string &colName, const std::vector< std::string > &col=std::vector< std::string >())
bool GetModified()
Definition ISTable.h:1447
ISTable & operator=(const ISTable &inTable)
static const eSearchType eEQUAL
Definition ISTable.h:79
void SetModified(const bool modified)
Definition ISTable.h:1441
static const unsigned char CASE_INSENSE
Definition ISTable.h:117
static void SetIntersect(const std::vector< unsigned int > &a, const std::vector< unsigned int > &b, std::vector< unsigned int > &ret)
void RenameColumn(const std::string &oldColName, const std::string &newColName)
static const eSearchType eGREATER_THAN
Definition ISTable.h:82
void SetFlags(const std::string &colName, const unsigned char flags)
void Search(std::vector< unsigned int > &res, const std::string &target, const std::string &colName, const unsigned int fromRowIndex=0, const eSearchDir searchDir=eFORWARD, const eSearchType searchType=eEQUAL)
static const eOrientation eCOLUMN_WISE
Definition ISTable.h:58
void GetColumn(std::vector< std::string > &col, const std::string &colName, const unsigned int fromRowIndex, unsigned int toRowIndex)
ISTable(const std::string &name, eOrientation orient, const Char::eCompareType colCaseSense=Char::eCASE_SENSITIVE)
unsigned int GetLastRowIndex()
Definition ISTable.h:1397
void CreateKey(const std::vector< std::string > &colNames)
static const eSearchDir eFORWARD
Definition ISTable.h:99
void Read(unsigned int indexInFile)
static const unsigned char DT_STRING_VAL
Definition ISTable.h:110
static const eOrientation eROW_WISE
Definition ISTable.h:59
ITTable::eOrientation eOrientation
Definition ISTable.h:56
unsigned int FindFirst(const std::vector< std::string > &targets, const std::vector< std::string > &colNames, const std::string &indexName=std::string())
void FindDuplicateRows(std::vector< std::pair< unsigned int, unsigned int > > &duplRows, const std::vector< std::string > &colNames, const bool keepDuplRows, const eSearchDir searchDir=eFORWARD)
void RebuildIndices()
const std::vector< std::string > & GetColumnNames() const
static const unsigned char DT_STRING
Definition ISTable.h:124
void UpdateIndex(const std::string &indexName, const unsigned int rowIndex)
void FillRow(const unsigned int rowIndex, const std::vector< std::string > &row)
static const eSearchType eGREATER_THAN_OR_EQUAL
Definition ISTable.h:83
ISTable(const ISTable &inTable)
unsigned char GetDataType(const std::string &colName)
eTableDiff operator==(ISTable &inTable)
static const eSearchType eLESS_THAN_OR_EQUAL
Definition ISTable.h:81
void DeleteRow(const unsigned int rowIndex)
void GetColumn(std::vector< std::string > &col, const std::string &colName)
bool PrintDiff(ISTable &inTable)
Char::eCompareType GetColCaseSense() const
Definition ISTable.h:1471
void RebuildIndex(const std::string &indexName)
unsigned int GetNumColumns() const
Definition ISTable.h:1465
int Write()
void FillColumn(const std::string &colName, const std::vector< std::string > &col)
void Search(std::vector< unsigned int > &res, const std::vector< std::string > &targets, const std::vector< std::string > &colNames, const unsigned int fromRowIndex=0, const eSearchDir searchDir=eFORWARD, const eSearchType searchType=eEQUAL, const std::string &indexName=std::string())
const std::string & GetName() const
Definition ISTable.h:1453
int WriteObject(Serializer *ser, int &size)
static ISTable * Merge(ISTable &firstTable, ISTable &secondTable, unsigned int typeOfMerge=0)
void GetRow(std::vector< std::string > &row, const unsigned int rowIndex, const std::string &fromColName=std::string(), const std::string &toColName=std::string())
unsigned int InsertRow(const unsigned int atRowIndex, const std::vector< std::string > &row=std::vector< std::string >())
void ClearRow(const unsigned int rowIndex)
void CreateIndex(const std::string &indexName, const std::vector< std::string > &colNames, const unsigned int unique=0)
void UpdateCell(const unsigned int rowIndex, const std::string &colName, const std::string &value)
void DeleteRows(const std::vector< unsigned int > &rows)
ISTable(eOrientation orient, const Char::eCompareType colCaseSense=Char::eCASE_SENSITIVE)
static const unsigned char DT_INTEGER
Definition ISTable.h:126
ISTable(const std::string &name, const Char::eCompareType colCaseSense=Char::eCASE_SENSITIVE)
void ClearColumn(const std::string &colName)
static const unsigned char W_SPACE_INSENSE
Definition ISTable.h:122
static const eSearchType eLESS_THAN
Definition ISTable.h:80
static const eSearchDir eBACKWARD
Definition ISTable.h:100
unsigned int GetNumRows() const
Definition ISTable.h:1459
static void SetUnion(const std::vector< unsigned int > &a, const std::vector< unsigned int > &b, std::vector< unsigned int > &ret)
eTableDiff
Definition ISTable.h:62
@ eMISSING
Definition ISTable.h:72
@ eNONE
Definition ISTable.h:63
@ eEXTRA
Definition ISTable.h:74
@ eLESS_COLS
Definition ISTable.h:66
@ eCELLS
Definition ISTable.h:70
@ eLESS_ROWS
Definition ISTable.h:69
@ eMORE_ROWS
Definition ISTable.h:68
@ eCOL_NAMES
Definition ISTable.h:67
@ eMORE_COLS
Definition ISTable.h:65
@ eCASE_SENSE
Definition ISTable.h:64
void SetName(const std::string &name)
void GetColumnsIndices(std::vector< unsigned int > &colIndices, const std::vector< std::string > &colNames)
const std::vector< std::string > & GetRow(const unsigned int rowIndex)
ITTable::eSearchDir eSearchDir
Definition ISTable.h:97
static const unsigned char CASE_SENSE
Definition ISTable.h:115
unsigned int GetNumIndices()
Definition ISTable.h:1405
void GetColumn(std::vector< std::string > &col, const std::string &colName, const std::string &indexName)
unsigned int AddRow(const std::vector< std::string > &row=std::vector< std::string >())
ITTable::eSearchType eSearchType
Definition ISTable.h:77
ISTable(const Char::eCompareType colCaseSense=Char::eCASE_SENSITIVE)
void DeleteIndex(const std::string &indexName)
bool IndexExists(const std::string &indexName)
Definition ISTable.h:1413
static const unsigned char DT_INTEGER_VAL
Definition ISTable.h:111
void DeleteColumn(const std::string &colName)
void GetColumn(std::vector< std::string > &col, const std::string &colName, const std::vector< unsigned int > &rowIndex)
eSearchType
Definition ITTable.h:61
@ eLESS_THAN_OR_EQUAL
Definition ITTable.h:64
@ eLESS_THAN
Definition ITTable.h:63
@ eGREATER_THAN
Definition ITTable.h:65
@ eEQUAL
Definition ITTable.h:62
@ eGREATER_THAN_OR_EQUAL
Definition ITTable.h:66
eSearchDir
Definition ITTable.h:70
@ eFORWARD
Definition ITTable.h:71
@ eBACKWARD
Definition ITTable.h:72
eOrientation
Definition ITTable.h:55
@ eROW_WISE
Definition ITTable.h:57
@ eCOLUMN_WISE
Definition ITTable.h:56
Definition Serializer.h:44
Definition mapped_vector.h:22
unsigned int size() const
Definition mapped_vector.C:79
unsigned int UInt32
Definition rcsb_types.h:15