00001 
00002 
00003 
00004 
00005 
00006 #include <cstdlib>
00007 #include <iostream>
00008 #include <string>
00009 #include <limits>
00010 
00011 #include "Options.h"
00012 #include "Constants.h"
00013 #include "Logger.h"
00014 #include "MyException.h"
00015 #include "SchemaException.h"
00016 #include "StringOps.h"
00017 
00018 template<typename DataType>
00019 void Table<DataType>::checkData(bool aThrowExceptions)
00020 {
00021   using namespace std;
00022   mChecked = true;
00023   Options options;
00024   Logger log;
00025 
00026   
00027   for (unsigned colNo = 0; colNo < mDataPtr->qNumCols(); ++colNo)
00028   {
00029     
00030     string format = mInfoPtr->getFormat(colNo);
00031     if (format == "bigint")
00032     {
00033       bigintTest(*mDataPtr, colNo);
00034     }
00035 
00036     
00037     unsigned varcharLen = 0;
00038     if (format.find("varchar") != string::npos)
00039     {
00040       int posa = format.find("(") + 1;
00041       int posb = format.rfind(")");
00042       varcharLen = atoi(format.substr(posa, posb - posa).c_str());
00043     }
00044     bool isVarchar = (varcharLen > 0);
00045     bool isDatetime = (format == "datetime");
00046 
00047     
00048     int numDefaults = 0;
00049 
00050     
00051     string defValue = mInfoPtr->getDefault(colNo);
00052     bool existsDefault = !defValue.empty();
00053 
00054     
00055     for (unsigned rowNo = 0; rowNo < mDataPtr->qNumRows(); ++rowNo)
00056     {
00057       
00058       if (!mDataPtr->isDefined(colNo, rowNo))
00059       {
00060         
00061         
00062         if (existsDefault)
00063         {
00064           mDataPtr->assign(colNo, rowNo, defValue);
00065           ++numDefaults;
00066         }
00067         else
00068         
00069         
00070         
00071         {
00072           string mess = mInfoPtr->getTableName();
00073           mess += ": Undefined value but no default given. Assigning: ";
00074           mess += mInfoPtr->getName(colNo);
00075           mess += " for row " + StringOps::NumToString(rowNo);
00076           log.addError(mess);
00077           mChecked = false;
00078           if (aThrowExceptions) throw SchemaException(mess);
00079         }
00080       }
00081       else
00082       {
00083         if (isDatetime)
00084         {
00085           
00086           datetimeTest(*mDataPtr, colNo, rowNo);
00087         }
00088         else if (isVarchar)
00089         {
00090           
00091           varcharTest(*mDataPtr, colNo, rowNo, varcharLen);
00092         }
00093         
00094         if (isInf(mDataPtr->at(colNo, rowNo)))
00095         {
00096           string mess = mInfoPtr->getTableName() + " ";
00097           mess += mInfoPtr->getName(colNo) + " ";
00098           mess += StringOps::NumToString(colNo) + " ";
00099           mess += StringOps::NumToString(rowNo) + " : ";
00100           mess += StringOps::NumToString(mDataPtr->at(colNo, rowNo)) + " : ";
00101 
00102           if (StringOps::ToLower(options.getArchive()) == "vsa" or
00103               mInfoPtr->getTableName().find("Astrometry") != std::string::npos)
00104           {
00105             mess += "inf value found, replaced with dbl_max";
00106             if (format == "real")
00107             {
00108               mDataPtr->assign(colNo, rowNo, numeric_limits<float>::max( )-1);
00109             }
00110             else if (format == "float")
00111             {
00112               mDataPtr->assign(colNo, rowNo, numeric_limits<double>::max( )-1);
00113             }
00114             else
00115             {
00116               mHasNaNs = true;
00117               if (aThrowExceptions) throw MyException(mess);
00118             }
00119           } else {
00120             mess += "NaN value found";
00121             mHasNaNs = true;
00122             if (aThrowExceptions) throw MyException(mess);
00123           }
00124           log.addError(mess);
00125         }
00126         
00127         else if (isNaN(mDataPtr->at(colNo, rowNo)))
00128         {
00129           string mess = mInfoPtr->getTableName() + " ";
00130           mess += mInfoPtr->getName(colNo) + " ";
00131           mess += StringOps::NumToString(colNo) + " ";
00132           mess += StringOps::NumToString(rowNo) + " : ";
00133           mess += StringOps::NumToString(mDataPtr->at(colNo, rowNo)) + " : ";
00134           mess += "NaN value found";
00135           log.addError(mess);
00136           mHasNaNs = true;
00137           if (aThrowExceptions) throw MyException(mess);
00138         }
00139       }
00140     } 
00141 
00142     
00143     if (existsDefault && numDefaults > 0)
00144     {
00145       string mess = mInfoPtr->getTableName() + " ";
00146       mess += mInfoPtr->getName(colNo) + " : ";
00147       mess += " used default value of ";
00148       mess += defValue;
00149       mess += " on " + StringOps::NumToString(numDefaults) + " rows";
00150       log.addWarning(mess);
00151     }
00152   } 
00153 } 
00154 
00157 template<typename DataType>
00158 void Table<DataType>::bigintTest(TableData<Metadata>& aData, int aCol)
00159 {
00160   if (aData.isDefined(aCol, 0) &&
00161       aData.at(aCol, 0).find_first_not_of("1234567890.") != std::string::npos)
00162   {
00163     aData.setundef(aCol, 0);
00164   }
00165 }
00166 
00169 template<typename DataType>
00170 void Table<DataType>::varcharTest(TableData<Metadata>& aData,
00171                                   int aCol,
00172                                   int aRow,
00173                                   unsigned aLength)
00174 {
00175   Metadata value = aData.at(aCol, aRow);
00176 
00177   if (value.size() > aLength)
00178   {
00179     aData.assign(aCol, aRow,
00180                  value.substr(0, aLength));
00181 
00182     std::string mess = mInfoPtr->getTableName() + " ";
00183     mess += mInfoPtr->getName(aCol) + " ";
00184     mess += StringOps::NumToString(aCol) + " ";
00185     mess += StringOps::NumToString(aRow) + " : ";
00186     mess += "String too long for " + mInfoPtr->getFormat(aCol) + ", ";
00187     mess += value + " truncated";
00188     Logger log;
00189     log.addWarning(mess);
00190   }
00191 } 
00192 
00193 template<typename DataType>
00194 void Table<DataType>::datetimeTest(TableData<Metadata>& aData,
00195                                    int aCol,
00196                                    int aRow)
00197 {
00198   Metadata value = aData.at(aCol, aRow);
00199 
00200   if (value[0] == '0')
00201   {
00202     aData.assign(aCol, aRow, DefaultValues::datetimedef);
00203   }
00204   else
00205   {
00206     std::string::size_type colonIndex = value.find(':');
00207     if (colonIndex != std::string::npos &&
00208         value.find(':', colonIndex+1) == std::string::npos)
00209     {
00210       aData.assign(aCol, aRow, value.append(":00"));
00211     }
00212   }
00213 }
00214 
00215 template<typename DataType>
00216 bool Table<DataType>::checkRange()
00217 {
00218   using namespace std;
00219   Logger log;
00220   bool checked = true;
00221   bool isunique = true;
00222   string messUnique;
00223   string extNum, oldExtNum;
00224   extNum = "0";
00225   oldExtNum = "0";
00226   unsigned counter = 0;
00227 
00228   
00229   for (unsigned colNo = 0; colNo < mDataPtr->qNumCols(); ++colNo)
00230   {
00231     
00232     string rangeValue = mInfoPtr->getRange(colNo);
00233     bool isRange = !rangeValue.empty();
00234 
00235     
00236     string discrValue = mInfoPtr->getDiscrValues(colNo);
00237     bool isDiscr = !discrValue.empty();
00238 
00239     
00240     bool isSeqNum = (mInfoPtr->getName(colNo) == ExpectNames::seqNum);
00241     
00242 
00243     
00244     unsigned maxRows = mDataPtr -> qNumRows();
00245     if (mInfoPtr->isDetection())
00246     {
00247       maxRows -= mDataPtr -> qNumExts();
00248     }
00249 
00250     for (unsigned rowNo = 0; rowNo < maxRows; ++rowNo)
00251     {
00252       
00253       
00254       bool isgdValue=true;
00255       if (isRange && mDataPtr->isDefined(colNo, rowNo))
00256       {
00257         StringMap range;
00258         StringOps::split(rangeValue, ',', range);
00259         isgdValue = mDataPtr->testRange(colNo, rowNo, range);
00260       }
00261       else if (isDiscr && mDataPtr->isDefined(colNo, rowNo))
00262       {
00263         StringMap values;
00264         StringOps::split(discrValue, ',', values);
00265         isgdValue = mDataPtr->testDiscrValues(colNo, rowNo, values);
00266       }
00267       else if (isSeqNum)
00268       {
00269         string sqnval;
00270         mDataPtr->value(colNo, rowNo, sqnval);
00271         oldExtNum = extNum;
00272         mDataPtr->value(mInfoPtr->getAttNo(ExpectNames::extNum), rowNo, extNum);
00273         if (extNum != oldExtNum)
00274         {
00275           counter = 0;
00276         }
00277         counter += 1;
00278         if (atoi(sqnval.c_str()) > 0)
00279         {
00280           isgdValue = (counter == (unsigned)atoi(sqnval.c_str()));
00281         }
00282       }
00283       else
00284       {
00285         isgdValue = true;
00286       }
00287       
00288       
00289       if (!isgdValue && isRange)  
00290       {
00291         string actval;
00292         mDataPtr->value(colNo, rowNo, actval);
00293         string mess = mInfoPtr->getTableName() + " ";
00294         mess += mInfoPtr->getName(colNo) + " ";
00295         mess += "Range = " + rangeValue + " ";
00296         mess += "Value = "+ actval +" : ";
00297         mess += "Value outside logical range";
00298         log.addError(mess);
00299         checked = false;
00300       }
00301       else if (!isgdValue && isDiscr)  
00302       {
00303         string actval;
00304         mDataPtr->value(colNo, rowNo, actval);
00305         string mess = mInfoPtr->getTableName() + " ";
00306         mess += mInfoPtr->getName(colNo) + " ";
00307         mess += "Discrete Values : " + discrValue + " ";
00308         mess += "Value = " + actval+ " : ";
00309         mess += "Value outside logical range";
00310         log.addError(mess);
00311         checked = false;
00312       }
00313       else if (!isgdValue && isSeqNum)
00314       {
00315         messUnique = mInfoPtr->getTableName() + " ";
00316         messUnique += extNum + ",";
00317         messUnique += mInfoPtr->getName(colNo) + " ";
00318         messUnique += "not linear/unique";
00319         
00320         checked = false;
00321         isunique = false;
00322       }
00323     } 
00324   } 
00325   if (!checked and !isunique)
00326   {
00327     log.addError(messUnique);
00328   }
00329   
00330   string mess = mInfoPtr->getTableName() + " ";
00331   mess += " Range checking complete ";
00332   log.addMessage(mess);
00333   return checked;
00334 }
00335 
00336 template<typename DataType>
00337 void Table<DataType>::testDump()
00338 {
00339   using namespace std;
00340   Logger log;
00341   log.addMessage("Dumping data for table: " + mInfoPtr->getTableName());
00342 
00343   int numRows = mDataPtr->qNumRows();
00344   int numCols = mDataPtr->qNumCols();
00345 
00346   if (numRows > 10) numRows = 10;
00347 
00348   for (int colNo = 0; colNo < numCols; ++colNo)
00349   {
00350     cout << mInfoPtr->getName(colNo) << " = ";
00351 
00352     for (int rowNo = 0; rowNo < numRows; ++rowNo)
00353     {
00354       DataType value = mDataPtr->at(colNo, rowNo);
00355       cout << value << " ";
00356     }
00357     cout << endl;
00358   }
00359 }
00360 
00361 
00362 
00363 
00364 
00365 
00366 
00367 
00368 
00369 
00370 
00371 
00372 
00373 
00374 
00375 
00376 
00377