1
2
3
4 """
5 Removes duplicates in a log file, but keeps the same order, so that
6 cu8 can be rerun without any problems.
7
8 @author: N.J. Cross
9 @org: WFAU, IfA, University of Edinburgh
10 """
11
12 import os
13 import sys
14
15 import wsatools.DbConnect.DbConstants as dbc
16
17
18
19
20 if __name__ == '__main__':
21 logFilePath=sys.argv[1]
22 outLogFilePath=os.path.splitext(logFilePath)[0]+'_rmdup.log'
23
24 lines=file(logFilePath).readlines()
25
26 catFileDict={}
27 catFileList=[]
28 for index,line in enumerate(lines):
29 wrds=line.split()
30 if wrds[0][:4]=="w200" and wrds[1]=="#1":
31 catFileList.append(wrds[0])
32 catFileDict[wrds[0]]=index
33 middleCatFileList = sorted(set(catFileList))
34
35 finalCatFileList=[]
36 for catName in catFileList:
37 index=dbc.intDefault()
38 for ii,cName in enumerate(middleCatFileList):
39 if catName==cName:
40 index=ii
41 if index>=0:
42 finalCatFileList.append(catName)
43 middleCatFileList.pop(index)
44 newLines=[]
45 for catName in finalCatFileList:
46 try:
47 index=catFileDict[catName]
48 except:
49 continue
50 test=False
51 while not test:
52 if lines[index][0]=="w" and lines[index].split()[0]!=catName:
53 test=True
54 else:
55 newLines.append(lines[index])
56 index+=1
57 if index==len(lines):
58 test=True
59 file(outLogFilePath,'w').writelines(newLines)
60
61
62
63
64
65