00001
00002
00003
00004
00005
00006 #include <cstdlib>
00007 #include <iostream>
00008 #include <string>
00009 #include <limits>
00010
00011 #include "Options.h"
00012 #include "Constants.h"
00013 #include "Logger.h"
00014 #include "MyException.h"
00015 #include "SchemaException.h"
00016 #include "StringOps.h"
00017
00018 template<typename DataType>
00019 void Table<DataType>::checkData(bool aThrowExceptions)
00020 {
00021 using namespace std;
00022 mChecked = true;
00023 Options options;
00024 Logger log;
00025
00026
00027 for (unsigned colNo = 0; colNo < mDataPtr->qNumCols(); ++colNo)
00028 {
00029
00030 string format = mInfoPtr->getFormat(colNo);
00031 if (format == "bigint")
00032 {
00033 bigintTest(*mDataPtr, colNo);
00034 }
00035
00036
00037 unsigned varcharLen = 0;
00038 if (format.find("varchar") != string::npos)
00039 {
00040 int posa = format.find("(") + 1;
00041 int posb = format.rfind(")");
00042 varcharLen = atoi(format.substr(posa, posb - posa).c_str());
00043 }
00044 bool isVarchar = (varcharLen > 0);
00045 bool isDatetime = (format == "datetime");
00046
00047
00048 int numDefaults = 0;
00049
00050
00051 string defValue = mInfoPtr->getDefault(colNo);
00052 bool existsDefault = !defValue.empty();
00053
00054
00055 for (unsigned rowNo = 0; rowNo < mDataPtr->qNumRows(); ++rowNo)
00056 {
00057
00058 if (!mDataPtr->isDefined(colNo, rowNo))
00059 {
00060
00061
00062 if (existsDefault)
00063 {
00064 mDataPtr->assign(colNo, rowNo, defValue);
00065 ++numDefaults;
00066 }
00067 else
00068
00069
00070
00071 {
00072 string mess = mInfoPtr->getTableName();
00073 mess += ": Undefined value but no default given. Assigning: ";
00074 mess += mInfoPtr->getName(colNo);
00075 mess += " for row " + StringOps::NumToString(rowNo);
00076 log.addError(mess);
00077 mChecked = false;
00078 if (aThrowExceptions) throw SchemaException(mess);
00079 }
00080 }
00081 else
00082 {
00083 if (isDatetime)
00084 {
00085
00086 datetimeTest(*mDataPtr, colNo, rowNo);
00087 }
00088 else if (isVarchar)
00089 {
00090
00091 varcharTest(*mDataPtr, colNo, rowNo, varcharLen);
00092 }
00093
00094 if (isInf(mDataPtr->at(colNo, rowNo)))
00095 {
00096 string mess = mInfoPtr->getTableName() + " ";
00097 mess += mInfoPtr->getName(colNo) + " ";
00098 mess += StringOps::NumToString(colNo) + " ";
00099 mess += StringOps::NumToString(rowNo) + " : ";
00100 mess += StringOps::NumToString(mDataPtr->at(colNo, rowNo)) + " : ";
00101
00102 if (StringOps::ToLower(options.getArchive()) == "vsa" or
00103 mInfoPtr->getTableName().find("Astrometry") != std::string::npos)
00104 {
00105 mess += "inf value found, replaced with dbl_max";
00106 if (format == "real")
00107 {
00108 mDataPtr->assign(colNo, rowNo, numeric_limits<float>::max( )-1);
00109 }
00110 else if (format == "float")
00111 {
00112 mDataPtr->assign(colNo, rowNo, numeric_limits<double>::max( )-1);
00113 }
00114 else
00115 {
00116 mHasNaNs = true;
00117 if (aThrowExceptions) throw MyException(mess);
00118 }
00119 } else {
00120 mess += "NaN value found";
00121 mHasNaNs = true;
00122 if (aThrowExceptions) throw MyException(mess);
00123 }
00124 log.addError(mess);
00125 }
00126
00127 else if (isNaN(mDataPtr->at(colNo, rowNo)))
00128 {
00129 string mess = mInfoPtr->getTableName() + " ";
00130 mess += mInfoPtr->getName(colNo) + " ";
00131 mess += StringOps::NumToString(colNo) + " ";
00132 mess += StringOps::NumToString(rowNo) + " : ";
00133 mess += StringOps::NumToString(mDataPtr->at(colNo, rowNo)) + " : ";
00134 mess += "NaN value found";
00135 log.addError(mess);
00136 mHasNaNs = true;
00137 if (aThrowExceptions) throw MyException(mess);
00138 }
00139 }
00140 }
00141
00142
00143 if (existsDefault && numDefaults > 0)
00144 {
00145 string mess = mInfoPtr->getTableName() + " ";
00146 mess += mInfoPtr->getName(colNo) + " : ";
00147 mess += " used default value of ";
00148 mess += defValue;
00149 mess += " on " + StringOps::NumToString(numDefaults) + " rows";
00150 log.addWarning(mess);
00151 }
00152 }
00153 }
00154
00157 template<typename DataType>
00158 void Table<DataType>::bigintTest(TableData<Metadata>& aData, int aCol)
00159 {
00160 if (aData.isDefined(aCol, 0) &&
00161 aData.at(aCol, 0).find_first_not_of("1234567890.") != std::string::npos)
00162 {
00163 aData.setundef(aCol, 0);
00164 }
00165 }
00166
00169 template<typename DataType>
00170 void Table<DataType>::varcharTest(TableData<Metadata>& aData,
00171 int aCol,
00172 int aRow,
00173 unsigned aLength)
00174 {
00175 Metadata value = aData.at(aCol, aRow);
00176
00177 if (value.size() > aLength)
00178 {
00179 aData.assign(aCol, aRow,
00180 value.substr(0, aLength));
00181
00182 std::string mess = mInfoPtr->getTableName() + " ";
00183 mess += mInfoPtr->getName(aCol) + " ";
00184 mess += StringOps::NumToString(aCol) + " ";
00185 mess += StringOps::NumToString(aRow) + " : ";
00186 mess += "String too long for " + mInfoPtr->getFormat(aCol) + ", ";
00187 mess += value + " truncated";
00188 Logger log;
00189 log.addWarning(mess);
00190 }
00191 }
00192
00193 template<typename DataType>
00194 void Table<DataType>::datetimeTest(TableData<Metadata>& aData,
00195 int aCol,
00196 int aRow)
00197 {
00198 Metadata value = aData.at(aCol, aRow);
00199
00200 if (value[0] == '0')
00201 {
00202 aData.assign(aCol, aRow, DefaultValues::datetimedef);
00203 }
00204 else
00205 {
00206 std::string::size_type colonIndex = value.find(':');
00207 if (colonIndex != std::string::npos &&
00208 value.find(':', colonIndex+1) == std::string::npos)
00209 {
00210 aData.assign(aCol, aRow, value.append(":00"));
00211 }
00212 }
00213 }
00214
00215 template<typename DataType>
00216 bool Table<DataType>::checkRange()
00217 {
00218 using namespace std;
00219 Logger log;
00220 bool checked = true;
00221 bool isunique = true;
00222 string messUnique;
00223 string extNum, oldExtNum;
00224 extNum = "0";
00225 oldExtNum = "0";
00226 unsigned counter = 0;
00227
00228
00229 for (unsigned colNo = 0; colNo < mDataPtr->qNumCols(); ++colNo)
00230 {
00231
00232 string rangeValue = mInfoPtr->getRange(colNo);
00233 bool isRange = !rangeValue.empty();
00234
00235
00236 string discrValue = mInfoPtr->getDiscrValues(colNo);
00237 bool isDiscr = !discrValue.empty();
00238
00239
00240 bool isSeqNum = (mInfoPtr->getName(colNo) == ExpectNames::seqNum);
00241
00242
00243
00244 unsigned maxRows = mDataPtr -> qNumRows();
00245 if (mInfoPtr->isDetection())
00246 {
00247 maxRows -= mDataPtr -> qNumExts();
00248 }
00249
00250 for (unsigned rowNo = 0; rowNo < maxRows; ++rowNo)
00251 {
00252
00253
00254 bool isgdValue=true;
00255 if (isRange && mDataPtr->isDefined(colNo, rowNo))
00256 {
00257 StringMap range;
00258 StringOps::split(rangeValue, ',', range);
00259 isgdValue = mDataPtr->testRange(colNo, rowNo, range);
00260 }
00261 else if (isDiscr && mDataPtr->isDefined(colNo, rowNo))
00262 {
00263 StringMap values;
00264 StringOps::split(discrValue, ',', values);
00265 isgdValue = mDataPtr->testDiscrValues(colNo, rowNo, values);
00266 }
00267 else if (isSeqNum)
00268 {
00269 string sqnval;
00270 mDataPtr->value(colNo, rowNo, sqnval);
00271 oldExtNum = extNum;
00272 mDataPtr->value(mInfoPtr->getAttNo(ExpectNames::extNum), rowNo, extNum);
00273 if (extNum != oldExtNum)
00274 {
00275 counter = 0;
00276 }
00277 counter += 1;
00278 if (atoi(sqnval.c_str()) > 0)
00279 {
00280 isgdValue = (counter == (unsigned)atoi(sqnval.c_str()));
00281 }
00282 }
00283 else
00284 {
00285 isgdValue = true;
00286 }
00287
00288
00289 if (!isgdValue && isRange)
00290 {
00291 string actval;
00292 mDataPtr->value(colNo, rowNo, actval);
00293 string mess = mInfoPtr->getTableName() + " ";
00294 mess += mInfoPtr->getName(colNo) + " ";
00295 mess += "Range = " + rangeValue + " ";
00296 mess += "Value = "+ actval +" : ";
00297 mess += "Value outside logical range";
00298 log.addError(mess);
00299 checked = false;
00300 }
00301 else if (!isgdValue && isDiscr)
00302 {
00303 string actval;
00304 mDataPtr->value(colNo, rowNo, actval);
00305 string mess = mInfoPtr->getTableName() + " ";
00306 mess += mInfoPtr->getName(colNo) + " ";
00307 mess += "Discrete Values : " + discrValue + " ";
00308 mess += "Value = " + actval+ " : ";
00309 mess += "Value outside logical range";
00310 log.addError(mess);
00311 checked = false;
00312 }
00313 else if (!isgdValue && isSeqNum)
00314 {
00315 messUnique = mInfoPtr->getTableName() + " ";
00316 messUnique += extNum + ",";
00317 messUnique += mInfoPtr->getName(colNo) + " ";
00318 messUnique += "not linear/unique";
00319
00320 checked = false;
00321 isunique = false;
00322 }
00323 }
00324 }
00325 if (!checked and !isunique)
00326 {
00327 log.addError(messUnique);
00328 }
00329
00330 string mess = mInfoPtr->getTableName() + " ";
00331 mess += " Range checking complete ";
00332 log.addMessage(mess);
00333 return checked;
00334 }
00335
00336 template<typename DataType>
00337 void Table<DataType>::testDump()
00338 {
00339 using namespace std;
00340 Logger log;
00341 log.addMessage("Dumping data for table: " + mInfoPtr->getTableName());
00342
00343 int numRows = mDataPtr->qNumRows();
00344 int numCols = mDataPtr->qNumCols();
00345
00346 if (numRows > 10) numRows = 10;
00347
00348 for (int colNo = 0; colNo < numCols; ++colNo)
00349 {
00350 cout << mInfoPtr->getName(colNo) << " = ";
00351
00352 for (int rowNo = 0; rowNo < numRows; ++rowNo)
00353 {
00354 DataType value = mDataPtr->at(colNo, rowNo);
00355 cout << value << " ";
00356 }
00357 cout << endl;
00358 }
00359 }
00360
00361
00362
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377