diff options
Diffstat (limited to 'extras')
| -rw-r--r-- | extras/gnfs-loganalyse.py | 152 | 
1 files changed, 138 insertions, 14 deletions
| diff --git a/extras/gnfs-loganalyse.py b/extras/gnfs-loganalyse.py index 1e0fde7f295..8ef45a4a5cc 100644 --- a/extras/gnfs-loganalyse.py +++ b/extras/gnfs-loganalyse.py @@ -24,6 +24,13 @@ import sys  class NFSRequest: +        def requestIsEntryOp (self): +                op = self.op +                if op == "CREATE" or op == "LOOKUP" or op == "REMOVE" or op == "LINK" or op == "RENAME" or op == "MKDIR" or op == "RMDIR" or op == "SYMLINK" or op == "MKNOD": +                        return 1 +                else: +                        return 0 +          def __init__ (self, logline, linecount):                  self.calllinecount = 0                  self.xid = "" @@ -31,8 +38,13 @@ class NFSRequest:                  self.opdata = ""                  self.replydata = ""                  self.replylinecount = 0 +                self.timestamp = "" +                self.entryname = "" +                self.gfid = "" +                self.replygfid = ""                  tokens = logline.strip ().split (" ") +                self.timestamp = tokens[0] + " " + tokens[1]                  if "XID:" not in tokens:                          return None @@ -47,6 +59,11 @@ class NFSRequest:                  opidx = tokens.index ("args:")                  self.op = tokens [opidx-1].strip (":")                  self.opdata = " ".join(tokens [opidx+1:]) +                if self.requestIsEntryOp (): +                        nameidx = tokens.index ("name:") +                        self.entryname = tokens[nameidx + 1].strip (",") +                gfididx = tokens.index ("gfid") +                self.gfid = tokens[gfididx +1].strip(",")          def getXID (self): @@ -54,25 +71,56 @@ class NFSRequest:          def setReply (self, logline, linecount):                  tokens = logline.strip ().split (" ") +                timestamp = tokens[0] + " " + tokens[1]                  statidx = tokens.index ("NFS:") -                self.replydata = " ".join (tokens [statidx+1:]) +                self.replydata = " TimeStamp: " + timestamp + " " + " ".join (tokens [statidx+1:])                  self.replylinecount = linecount +                if "gfid" in tokens: +                        gfididx = tokens.index ("gfid") +                        self.replygfid = tokens [gfididx + 1].strip(",")          def dump (self): -                print "ReqLine: " + str(self.calllinecount) + " XID: " + self.xid + " " + self.op + " ARGS: " + self.opdata + " RepLine: " + str(self.replylinecount) + " " + self.replydata +                print "ReqLine: " + str(self.calllinecount) + " TimeStamp: " + self.timestamp + ", XID: " + self.xid + " " + self.op + " ARGS: " + self.opdata + " RepLine: " + str(self.replylinecount) + " " + self.replydata  class NFSLogAnalyzer: -        def __init__ (self): +        def __init__ (self, optn, trackfilename, tracknamefh, stats): +                self.stats = stats                  self.xid_request_map = {}                  self.orphan_replies = {} +                self.rqlist = []                  self.CALL = 1                  self.REPLY = 2 +                self.optn = optn +                self.trackfilename = trackfilename +                self.tracknamefh = tracknamefh +                self.trackedfilehandles = []          def handle_call_line (self, logline, linecount):                  newreq = NFSRequest (logline, linecount)                  xid = newreq.getXID () -                self.xid_request_map [xid] = newreq +                if (self.optn == SYNTHESIZE): +                        self.xid_request_map [xid] = newreq +                        self.rqlist.append(newreq) +                elif self.optn == TRACKFILENAME: +                        if newreq.requestIsEntryOp(): +                                if newreq.entryname == self.trackfilename: +                                        self.xid_request_map [xid] = newreq +                                        self.rqlist.append(newreq) +                                else: +                                        del newreq +                        elif self.tracknamefh == ENABLE_TRACKNAME_FH: +                                if len (self.trackedfilehandles) > 0: +                                        if newreq.gfid in self.trackedfilehandles: +                                                self.xid_request_map [xid] = newreq +                                                self.rqlist.append(newreq) +                                        else: +                                                del newreq +                                else: +                                        del newreq +                        else: +                                del newreq +          def handle_reply_line (self, logline, linecount):                  tokens = logline.strip ().split (" ") @@ -82,7 +130,10 @@ class NFSLogAnalyzer:                  if xid not in self.xid_request_map.keys ():                          self.orphan_replies [xid] = logline                  else: -                        self.xid_request_map [xid].setReply (logline, linecount) +                        rq = self.xid_request_map [xid] +                        rq.setReply (logline, linecount) +                        if rq.requestIsEntryOp() and rq.entryname == self.trackfilename: +                                self.trackedfilehandles.append (rq.replygfid)          def analyzeLine (self, logline, linecount):                  tokens = logline.strip ().split (" ") @@ -102,43 +153,116 @@ class NFSLogAnalyzer:                          self.handle_reply_line (logline, linecount)          def getStats (self): +                if self.stats == 0: +                        return                  rcount = len (self.xid_request_map.keys ())                  orphancount = len (self.orphan_replies.keys ())                  print "Requests: " + str(rcount) + ", Orphans: " + str(orphancount)          def dump (self):                  self.getStats () -                for rq in self.xid_request_map.values (): +                for rq in self.rqlist:                          rq.dump () +                        del rq - +                self.rqlist = [] +                self.orphan_replies = {} +                self.xid_request_map = {}  linecount = 0 -la = NFSLogAnalyzer () + +SYNTHESIZE = 1 +TRACKFILENAME = 2 + +ENABLESTATS = 1 +DISABLESTATS = 0 + +ENABLE_TRACKNAME_FH = 1 +DISABLE_TRACKNAME_FH = 0  progmsgcount = 1000 -dumpinterval = 2000000 +dumpinterval = 200000 +operation = SYNTHESIZE +stats = ENABLESTATS +tracknamefh = DISABLE_TRACKNAME_FH +trackfilename = "" + +""" +Print the progress of the analysing operations every X number of lines read from +the logs, where X is the argument provided to this option. +Use this to print a status message every say 10000 lines processed or 100000 +lines processed to know how much longer the processing will go on for. + + +USAGE: --progress <NUMLINES> +"""  if "--progress" in sys.argv:          idx = sys.argv.index ("--progress")          progmsgcount = int(sys.argv[idx+1]) - +""" +The replies for a NFS request can be separated by hundreds and even thousands +of other NFS requests and replies. These can be spread over many hundreds and +thousands of log lines. This script maintains a memory dict to map each request +to its reply using the XID. Because this is in-core, there is a limit to the +number of entries in the dict. At regular intervals, it dumps the mapped +requests and the replies into the stdout. The requests whose replies were not +found at the point of dumping are left as orphans, i.e. without info about the +replies. Use this option to tune the number of lines to maximize the number of +requests whose replies are found while balancing the dict size with memory +on the machine. The default works fine for most cases. + +USAGE: --dump <NUMLINES> +"""  if "--dump" in sys.argv:          idx = sys.argv.index ("--dump")          dumpinterval = int(sys.argv[idx+1]) +""" +The default operation of the script is to output all the requests mapped to +their replies in a single line. This operation mode can be changed by this +argument. It is used to print only those operations that were performed on the +filename given as the argument to this option. Only those entry operations are +printed which contain this filename. + +USAGE: --trackfilename <filename> +""" +if "--trackfilename" in sys.argv: +        idx = sys.argv.index ("--trackfilename") +        trackfilename = sys.argv[idx + 1] +        operation = TRACKFILENAME + +""" +At every dump interval, some stats are printed about the dumped lines. +Use this option to disable printing that to avoid cluttering the +output. +""" +if "--nostats" in sys.argv: +        stats = DISABLESTATS + +""" +While tracking a file using --trackfilename, we're only given those +operations which contain the filename. This excludes a large number +of operations which operate on that file using its filehandle instead of +the filename. This option enables outputting those operations also. It +tracks every single file handle that was ever seen in the log for a given +filename. + +USAGE: --trackfilename +""" +if "--tracknamefh" in sys.argv: +        tracknamefh = ENABLE_TRACKNAME_FH + +la = NFSLogAnalyzer (operation, trackfilename, tracknamefh, stats) +  for line in sys.stdin:          linecount = linecount + 1          if linecount % dumpinterval == 0:                  sys.stderr.write ("Dumping data..\n")                  la.dump () -                del la -                la = NFSLogAnalyzer ()          if linecount % progmsgcount == 0:                  sys.stderr.write ("Integrating line: "+ str(linecount) + "\n")          la.analyzeLine (line, linecount) - -la.dump () | 
