Package helpers :: Module SpotDuplicateMFs
[hide private]

Source Code for Module helpers.SpotDuplicateMFs

  1  #! /usr/bin/env python 
  2  #------------------------------------------------------------------------------ 
  3  #$Id: SpotDuplicateMFs.py 6773 2010-04-06 15:55:21Z RossCollins $ 
  4  """ 
  5     Identifies properties of duplicate multiframe entries in the database. 
  6   
  7     @author: R.S. Collins 
  8     @org:    WFAU, IfA, University of Edinburgh 
  9  """ 
 10  #------------------------------------------------------------------------------ 
 11  import os 
 12   
 13  from wsatools.CLI      import CLI 
 14  from wsatools.FitsToDb import DbUpdater 
 15  from wsatools.Logger   import Logger 
 16  #------------------------------------------------------------------------------ 
 17   
18 -class DuplicateSpotter(DbUpdater):
19 """ 20 Identifies properties of duplicate multiframe entries in the database. 21 22 """ 23 numConsistentMFIDS = 0 24 _recordSet = str(DbUpdater._recordSet).replace( 25 "AND fileName NOT LIKE 'PixelFileNoLongerAvailable:%' ", '') 26 27 #-------------------------------------------------------------------------- 28
29 - def _prepare(self):
30 """ 31 Find duplicate multiframes. 32 33 """ 34 Logger.addMessage('Finding duplicates...') 35 36 self._recordTable = DbUpdater._recordTable 37 self._mfidDict = {} 38 self._recordList = [] 39 for filePathName, mfid in DbUpdater._getRecordsList(self): 40 filePath, fileName = os.path.split(filePathName) 41 fileID = os.path.join(os.path.basename(filePath), fileName) 42 if self._mfidDict.has_key(fileID): 43 self._recordList.append((filePathName, mfid)) 44 self._mfidDict[fileID] = [self._mfidDict[fileID], mfid] 45 else: 46 self._mfidDict[fileID] = mfid 47 if not self._recordList: 48 raise DuplicateSpotter.CuError("No duplicates found!")
49 50 #-------------------------------------------------------------------------- 51
52 - def updateDb(self, mfid, fitsFile, filePathName):
53 """ 54 Analyses FITS files. 55 56 @param mfid: MultiframeID in database. 57 @type mfid: int 58 @param fitsFile: Open FITS file containing the data for this mfid. 59 @type fitsFile: pyfits.HDUList 60 @param filePathName: Full path to the FITS file. 61 @type filePathName: str 62 63 """ 64 filePath, fileName = os.path.split(filePathName) 65 fileID = os.path.join(os.path.basename(filePath), fileName) 66 67 names = self.archive.queryColumnNames('Multiframe') 68 dupA = self.archive.query('*', 'Multiframe', 69 'multiframeID=%s'%min(self._mfidDict[fileID]), firstOnly=True) 70 dupB = self.archive.query('*', 'Multiframe', 71 'multiframeID=%s'%max(self._mfidDict[fileID]), firstOnly=True) 72 details = [] 73 for name, eleA, eleB in zip(names[1:], dupA[1:], dupB[1:]): 74 if eleA != eleB: 75 details.append((name, eleA, eleB)) 76 if details: 77 print("----------------------------") 78 print("Discrepencies for %s and %s" % (dupA[0], dupB[0])) 79 for detail in details: 80 print(' '.join(map(str, detail))) 81 82 # just looking in first extension for now - they should all be the same 83 fitsHdr = fitsFile[0].header 84 fitsMFID = fitsHdr['WSA_MFID'] 85 filePath, fileName = os.path.split(filePathName) 86 fileID = os.path.join(os.path.basename(filePath), fileName) 87 if fitsMFID != max(self._mfidDict[fileID]): 88 print("Dodgy fits mfid: %s" % fitsMFID) 89 if 'PixelFile' not in self.archive.query('fileName', 'Multiframe', 90 'multiframeID=%s'%min(self._mfidDict[fileID]), firstOnly=True): 91 print("fitsMFID: %s" % fitsMFID) 92 print("wsaMFID: %s %s %s" % 93 (min(self._mfidDict[fileID]), "fileName:", 94 self.archive.query('fileName', 'Multiframe', 95 'multiframeID=%s'%min(self._mfidDict[fileID]), 96 firstOnly=True))) 97 print("wsaMFID: %s %s %s" % 98 (max(self._mfidDict[fileID]), "fileName:", 99 self.archive.query('fileName', 'Multiframe', 100 'multiframeID=%s'%max(self._mfidDict[fileID]), 101 firstOnly=True))) 102 103 for dupemfid in self._mfidDict[fileID]: 104 print("| %7s | %7s | %40s |" % (fitsMFID, dupemfid, 105 self.archive.query('fileName', 'Multiframe', 106 'multiframeID=%s'%dupemfid, firstOnly=True))) 107 if mfid != int(fitsMFID): 108 Logger.addMessage("File: %s" % filePathName) 109 Logger.addMessage("Database MFID: %s" % mfid) 110 Logger.addMessage("File MFID : %s" % fitsMFID) 111 else: 112 self.numConsistentMFIDS += 1
113 114 #------------------------------------------------------------------------------ 115 # Entry point for script. 116 117 # Allow module to be imported as well as executed from the command line 118 if __name__ == '__main__': 119 # Define command-line interface settings. 120 CLI.progOpts.remove('user') 121 CLI.progOpts.remove('test') 122 CLI.progOpts.remove('verbose') 123 CLI.progOpts.remove('curator') 124 CLI.progArgs.remove('comment') 125 126 cli = CLI(DuplicateSpotter, "$Revision: 1.14 $") 127 Logger.addMessage(cli.getProgDetails()) 128 129 task = DuplicateSpotter(comment="Identifying properties of duplicate " 130 "multiframe entries in the database.", 131 database=cli.getArg("database")) 132 task.run() 133 Logger.addMessage("Files with multiframeIDs consistent with DB: %s" % 134 task.numConsistentMFIDS) 135 136 #------------------------------------------------------------------------------ 137 # Change log: 138 # 139 # 5-Dec-2006, RSC: Original version. 140