############################################################
#
# Guanhua Yan (ghyan@binghamton.edu)
#
# Extract features from binary executable programs.
#
############################################################

# gets all the imported DLLs
#import pefile
import sys
import os
import shutil
import time
import commands
import subprocess

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#
# Function definitions. Top level code at the bottom.
#
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

#---------- Processing Hexdump output

def procHexdump(dname):
	myresultlist = []
	
	# n-gram for byte sequeces
	# when n = 1 (one byte)
	hexdumpout = dname + '.hexdump'
	hexdumpcmd = '/usr/bin/hexdump ' + dname + ' > ' + hexdumpout
	print "Running hexdump command ...", hexdumpcmd
	os.system(hexdumpcmd)

	myfile = open(hexdumpout, 'r')
	mylinelist = []
	for myline in myfile.readlines():
		myline2 = myline.strip('\n\r')
		myline3 = myline2.partition(' ')[2]
		mylinelist.append(filter(lambda x: x.isalnum(), myline3))
	myfile.close()

	mybuffer = "".join(mylinelist)
        # every two characters are a byte 
	mynum = len(mybuffer) / 2

        # consider only 1 to 4 grams
	for x in range(2):
		n = x + 1
		mymap = {}

		for k in range(mynum - n + 1):
			mykey = mybuffer[k * 2: k * 2 + n * 2]
			result = mymap.get(mykey, None)
			if result == None:
				mymap[mykey] = 1
			else:
				mymap[mykey] += 1

		outputfile = hexdumpout + '.' + str(n) + '-gram'
		output = open(outputfile, 'w')
		total = 0.0;
		for key in mymap.keys():
			total += mymap[key]
		print >> output, "#total = ", total
		for key in mymap.keys():
			print >> output, key, '\t', format(mymap[key] / total, '.10f')
		output.close()
		myresultlist.append(outputfile)

        return myresultlist

#---------- Processing Objdump output

def procObjdump_ver0(dname):
	myresultlist = []
	objdumpout = dname + '.objdump'
	objdumpcmd = '/usr/bin/objdump -d ' + dname + ' > ' + objdumpout
	print "Running objdump command ...", objdumpcmd
	os.system(objdumpcmd)

	myfile = open(objdumpout, 'r')
	mybuffer = []
	
	searchkey = ""
	textsectionfound = 0;
	# read every line of the dump file
	for myline in myfile.readlines():
		#print myline
		myline2 = myline.strip('\n\r')
		#print 'splitting ', myline
		myarray = myline2.split(' ')
		if myarray[0] == "Disassembly":
			searchkey = "<"+myarray[-1].strip(':')+">:"
		elif not searchkey == "" and myarray[-1] == searchkey:
                        #myarray[-1] == "<.text>:":
			# in some cases, the "text" is shown as something else
			# set the flag
			textsectionfound = 1
		elif textsectionfound == 1:
			# text section found. following it will be the disassembly code
			myarray2 = myline2.split('\t')
			mylen2 = len(myarray2)
			if mylen2 == 3:
				myarray3 = myarray2[-1].split(' ')
				operator = myarray3[0]
				mybuffer.append(operator)

	myfile.close()

	mynum = len(mybuffer)

	# print the map
	for x in range(2):
		n = x + 1
		mymap = {}
                for k in range(mynum - n + 1):
			mykey = mybuffer[k]
			for s in range(n-1):
				mykey += "."+mybuffer[k + s + 1]
			result = mymap.get(mykey, None)
                        if result == None:
				mymap[mykey] = 1
			else:
				mymap[mykey] += 1

		outputfile = objdumpout + '.operator.' + str(n)
		output = open(outputfile, 'w')
		total = 0.0;
		for key in mymap.keys():
			total += mymap[key]
		print >> output, "#total = ", total
		for key in mymap.keys():
			print >> output, key, '\t', format(mymap[key] / total, '.10f')
		output.close()
		myresultlist.append(outputfile)

	return myresultlist

################## New objdump ####################

def procObjdump_ver1(dname):
	myresultlist = []
	objdumpout = dname + '.objdump'
	objdumpcmd = '/usr/bin/objdump -d ' + dname + ' > ' + objdumpout
        #objdumpcmd = '/usr/bin/objdump -Mintel -d ' + dname + ' > ' + objdumpout
	print "Running objdump command ...", objdumpcmd
	os.system(objdumpcmd)

	myfile = open(objdumpout, 'r')
	mybuffer = []
	
        mycallmap = {}

        for line in myfile:
                fields = line.rstrip().split('\t')
                if len(fields) <= 2:
                        continue
                #print fields
                items = fields[2].split(' ')
                if len(items) > 0:
                        operator = items[0]
                        mybuffer.append(operator)
                        if operator.find("call") != -1:
                                callfunc = items[-1]
                                if callfunc in mycallmap.keys():
                                        mycallmap[ callfunc ] += 1
                                else:
                                        mycallmap[ callfunc ] = 1
                        
        myfile.close()

        # print out the callmap
        outputfile = objdumpout + '.call'
        output = open(outputfile, 'w')
        print >> output, "#total = ", len(mycallmap.keys())
        for key in mycallmap.keys():
                print >> output, key, '\t', mycallmap[key]
        output.close()
        myresultlist.append(outputfile)

	mynum = len(mybuffer)

	# print the map. only applies to the 1-gram.
	for x in range(1):
		n = x + 1
		mymap = {}
                for k in range(mynum - n + 1):
			mykey = mybuffer[k]
			for s in range(n-1):
				mykey += "."+mybuffer[k + s + 1]
			result = mymap.get(mykey, None)
                        if result == None:
				mymap[mykey] = 1
			else:
				mymap[mykey] += 1

		outputfile = objdumpout + '.operator.' + str(n)
		output = open(outputfile, 'w')
		total = 0.0;
		for key in mymap.keys():
			total += mymap[key]
		print >> output, "#total = ", total
		for key in mymap.keys():
			print >> output, key, '\t', format(mymap[key] / total, '.10f')
		output.close()
		myresultlist.append(outputfile)

	return myresultlist 

################## From objdump. but with operand types. ####################

def demangle(names):
    args = ['c++filt']
    args.extend(names)
    pipe = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    stdout, _ = pipe.communicate()
    demangled = stdout.split("\n")

    # Each line ends with a newline, so the final entry of the split output
    # will always be ''.
    assert len(demangled) == len(names)+1
    return demangled[:-1]

def find_next_block(mybuffer, start):        
        i = start
        while i < len(mybuffer) and mybuffer[i] == None:
                i += 1
        j = i
        while j < len(mybuffer) and mybuffer[j] != None:
                j += 1
        print "find_next_block", i, j-1
        return (i, j-1)
                
def sliding_count(mybuffer, ngram):
        ngram_map = {}
        start = 0
        while True:
                (start, end) = find_next_block(mybuffer, start)
                if end < start:
                        break
                
                #print mybuffer, start, end
                if start >= len(mybuffer):
                        break


                for i in range(end - start + 1 - ngram + 1):
                        localbuffer = []
                        for j in range(ngram):
                                localbuffer.append(mybuffer[start + i + j])
                        #print "localbuffer =", localbuffer
                        if i % 100 == 0:
                                print "i =", i, start, end
                        
                        key = '|'.join(localbuffer)
                        if key in ngram_map.keys():
                                ngram_map[key] += 1
                        else:
                                ngram_map[key] = 1
                # move on to the next block
                start = end + 1
                                
        return ngram_map
                        
def procObjdump(dname):
	myresultlist = []
	objdumpout = dname + '.objdump'
	#objdumpcmd = '/usr/bin/objdump -d ' + dname + ' > ' + objdumpout
        #objdumpcmd = '/usr/bin/objdump -Mintel -d ' + dname + ' > ' + objdumpout

        # advice from http://stackoverflow.com/questions/3006438/is-there-an-online-name-demangler-for-c
        objdumpcmd = '/usr/bin/objdump -Mintel -dr ' + dname + ' | c++filt > ' + objdumpout
	print "Running objdump command ...", objdumpcmd
	os.system(objdumpcmd)

	myfile = open(objdumpout, 'r')
        mybuffer = []
        mycallmap = {}
        bad_instructions = False
        myexternal_set = set([])
        
        for line in myfile:
                fields = line.rstrip().split('\t')
                if len(fields) < 2:
                        mybuffer.append(None)
                        continue
                if len(fields) == 2:
                        continue
                print "fields =", fields
                items = fields[2].split(' ')
                print "items =", items
                if len(items) > 0:
                        # it's possible the opcode has multiple words (repetitive)
                        localbuffer = []
                        if items[0].find('(bad)') != -1:
                                print "trouble with", dname
                                print "not an opcode!", items[0]
                                bad_instructions = True
                                break
                                
                        while len(items) > 0 and len(items[0]) > 0 and items[0][0].isalpha():
                                localbuffer.append(items[0])
                                items.pop(0)
                        operator = ".".join(localbuffer)
                        print "operator =", operator
                        
                        # skip empty items
                        while len(items) > 0 and len(items[0]) == 0:
                                items.pop(0)                        

                        if len(items) > 0 and items[-1].find('<') != -1 and not '#' in items:
                                opd = items[-1].split('<')[1]
                                print "opd =", opd
                                names = demangle( [opd.split('>')[0]] )
                                myexternal_set.add(names[0])
                                
                        if len(items) > 0 and len(items[0]) > 0:
                                operands = items[0].split(',')
                                print "operands =", operands
                                for opd in operands:
                                        if opd[0] == '$':
                                                operator = operator + '-i' # immediate
                                        elif opd[0] == '%':
                                                operator = operator + '-r' # register
                                        elif len(opd) > 1 and opd[0] == '0' and opd[1] == 'x':
                                                if opd.find('(') != -1:
                                                        operator = operator + '-m' # memory
                                                else:
                                                        operator = operator + '-i' # immediate number
                                        elif len(opd) > 2 and opd[0] == '-' and opd[1] == '0' and opd[2] == 'x':
                                                if opd.find('(') != -1:
                                                        operator = operator + '-m' # memory
                                                else:
                                                        operator = operator + '-i' # immediate number
                                        elif opd[0] == '<':
                                                break # break out of the "for opd" loop
                                        elif opd[0] == '#':
                                                break
                                        elif opd[0] >= '0' and opd[0] <= '9':
                                                operator = operator + '-i' # immediate number
                                        elif len(opd) > 1 and opd[0] == '-' and opd[1] >= '0' and opd[1] <= '9':
                                                operator = operator + '-i' # immediate number
                                        elif opd[0] == '*':
                                                operator = operator + '-m' # memory
                                        elif opd[0] == '(':
                                                operator = operator + '-m' # memory
                                        elif opd[0] >= 'a' and opd[0] <= 'f':
                                                x = False
                                                for c in opd:
                                                        if c < 'a' or c > 'f':
                                                                x = True
                                                                break
                                                if x:
                                                        print "Unconsidered type!!!"
                                                else:
                                                        operator = operator + '-i' # immediate number
                                        else:
                                                print "Unconsidered type!!!"
                                                
                        mybuffer.append(operator)
                                
                        # check if this operator is an unconditional jmp. if so, also break
                        # if operator.find('jmp') == 0:
                        #        mybuffer.append(None)
                        
                        if operator.find("call") == 0:
                                #print "items =", items
                                callfunc = items[-1]
                                if callfunc in mycallmap.keys():
                                        mycallmap[ callfunc ] += 1
                                else:
                                        mycallmap[ callfunc ] = 1
                        
        myfile.close()

        myresultlist.append(objdumpout)
        if bad_instructions:
                return myresultlist

        outputfile = objdumpout + '.external'
        output = open(outputfile, 'w')
        for name in myexternal_set:
                print >> output, name
        output.close()
        myresultlist.append(outputfile)
        
        # print out the callmap
        outputfile = objdumpout + '.call'
        output = open(outputfile, 'w')
        print >> output, "#total = ", len(mycallmap.keys())
        for key in mycallmap.keys():
                print >> output, key, '\t', mycallmap[key]
        output.close()
        myresultlist.append(outputfile)

        """
	mynum = len(mybuffer)

        outputfile = objdumpout + '.buffer'
        output = open(outputfile, 'w+')

        localbuff = []
        prev = None
        for i in range(len(mybuffer)):
                x = mybuffer[i]
                if x == None and prev == None:
                        continue
                if x == None:
                        if len(localbuff) >= 10:
                                for y in localbuff:
                                        print >> output, y,
                                print >> output, ','
                        localbuff = []
                else:
                        localbuff.append(x)
                prev = x

        print >> output, '\n'
        output.close()
        myresultlist.append(outputfile)
        """

	# print the map. consider 4-grams
	for k in range(4):
		n = k + 1
                ngram_map = sliding_count(mybuffer, n)

		outputfile = objdumpout + '.operator.' + str(n)
		output = open(outputfile, 'w+')

		for key in ngram_map.keys():
			print >> output, key, '\t', ngram_map[key]
		output.close()
		myresultlist.append(outputfile)

        #print dname, "myexternal_set =", myexternal_set
        #sys.exit()        
	return myresultlist

#---------- Processing readelf output

def procRelocs(dname):
	myresultlist = []
        readelf_out = dname + '.readelf_rW'
        readelfcmd = "readelf -rW " + dname + ' > ' + readelf_out
        os.system(readelfcmd)
        
        outputfile = readelf_out + '.relocs'
        relocs_fn = open(outputfile, 'w+')

        key_set = set([])
        
        readelf_fn = open(readelf_out, 'r') 
        for line in readelf_fn:
                fields = line.rstrip().split(' ')
                print "fields =", fields
                if len(fields) < 6 or fields[0] == 'Relocation' or fields[0] == '' or fields[0] == 'Offset':
                        continue
                cnt = 0
                for x in fields:
                        if len(x) == 0:
                                continue
                        cnt += 1
                        print "x =", x
                        if cnt == 5:
                                items = x.split('@')
                                print "items =", items
                                y = demangle([items[0]])[0]
                                entries_tmp = y.split('(')
                                entries = entries_tmp[0].split('<')
                                
                                #relocs_fn.write(y)
                                if len(items) > 1:                                        
                                        z = items[1]
                                else:
                                        z = ""
                                #print dname, y, z
                                #z = "1"
                                #print >> relocs_fn, "\t".join([entries[0], z])
                                key = "@".join([entries[0], z])
                                
                                if not key in key_set:
                                        print >> relocs_fn, key, '\t', 1
                                        key_set.add(key)
                                break
                                        
        readelf_fn.close()
        relocs_fn.close()
        myresultlist.append(readelf_out)
        myresultlist.append(outputfile)
        return myresultlist
        
#---------- Processing pefile output

def procPEHeader(dname):
	myresultlist = []
	pe = pefile.PE(dname)
	dumpoutput = pe.dump_info()
	#print '+++++++ begin to dump for ', dname
	#print dumpoutput
	peoutput = dname+'.peheader'
	pefileout = open(peoutput, "w")
	pefileout.write(dumpoutput)
	pefileout.close()

	pefileresultfile = peoutput+'.keyvalue'
	myresultlist.append(pefileresultfile)
	pefileresult = open(pefileresultfile, "w")

	# read pe dump file
	category = ""
	image_import_map = {}
	top_resource = 0
	resource_name = ""
	total_resource_cnt = 0
	total_resource_size = 0
	unnamed_resource_cnt = 0
	unnamed_resource_size = 0
	total_reloc_cnt = 0
	total_reloc_size = 0
	pefileout = open(peoutput, "r")
	for myline in pefileout.readlines():
		myline2 = myline.strip('\n\r')
		myfields = myline2.split()		
		myfields2 = myline2.split('.')		
		#print myline2, myfields2

		if len(myline2) == 0:
			if category != "IMAGE_RESOURCE_DIRECTORY" and category != "IMAGE_BASE_RELOCATION":
				category = ""
			else:
				# basically keep the same category because there may be multiple resources
				pass
			if len(resource_name) > 0:
				if resource_name == "-":
					unnamed_resource_cnt += total_resource_cnt
					unnamed_resource_size += total_resource_size
				else:
					myfeature = resource_name+"__SIZE"
					if (total_resource_cnt == 0):
						#print myline2
						#print "trouble with "+resource_name
						myvalue = 0
					else:
						myvalue = total_resource_size / total_resource_cnt
					resource_name = ""
					print >> pefileresult, myfeature, myvalue
			continue
		if len(myline2) > 2 and myline2[0] == '[' and myline2[-1] == ']' :
			#print myline2
			category = myline2.strip('[]')
			prefix = category
			image_import_map = {}
			#print category
		elif (category == "IMAGE_DOS_HEADER" or
		      category == "IMAGE_NT_HEADERS" or
		      category == "IMAGE_FILE_HEADER" or
		      category == "IMAGE_OPTIONAL_HEADER" or 
		      category == "IMAGE_SECTION_HEADER" or 
		      category.find("IMAGE_DIRECTORY_ENTRY") != -1 ) :
			if len(myline2) >= 2 and myline2[0] == '0' and myline2[1] == 'x':
			        #print myfields
				if len(myfields) >= 4:
					if (category == "IMAGE_SECTION_HEADER") :
						if (myfields[2] == 'Name:'):
							prefix = prefix + '__' + myfields[3].strip('.')
							continue
					curkey = myfields[2].strip(':')
					myfeature = prefix + '__' + curkey
					myvalue = myfields[3]
					print >> pefileresult, myfeature, myvalue
					#if category == "IMAGE_FILE_HEADER" and myfields[2] == 'Machine:':
					#	if myfields[3] == "0x14C":
					#		myvalue = "I386"
					#	elif myfields[3] == "0x0200":
					#		myvalue = "IA64"
					#	elif myfields[3] == "0x8664":
					#		myvalue = "AMD64"
					#	myfeature = "IMAGE_FILE_HEADER__Machine_Type"
					#	print >> pefileresult, myfeature, myvalue
			elif myfields[0] == "Flags:":
				nitems = len(myfields)
				for k in range(nitems):
					if k == 0: continue;
					myfeature = prefix + '__Flags__'+ myfields[k].strip(',')
					print >> pefileresult, myfeature, "true"
			elif myfields[0] == "Entropy:":
				myfeature = prefix + '__Entropy'
				print >> pefileresult, myfeature, myfields[1]
		elif category == "IMAGE_IMPORT_DESCRIPTOR":
			myfields = myline2.split()		
			if len(myline2) >= 2 and myline2[0] == '0' and myline2[1] == 'x':
				image_import_map[myfields[2].strip(':')] = myfields[3]
				#print myfields[2].strip(':'), "->", myfields[3]
		elif len(myfields2) >= 2 and (myfields2[1] == "DLL" or myfields2[1] == "dll"):
			#print myline2
			#print myfields2
			#print len(image_import_map)
			#print image_import_map
			if (len(image_import_map) > 0):
				print >> pefileresult, "IMAGE_IMPORT__"+myfields2[0], "true"
				for key in image_import_map.keys():
					#print "key ", key, image_import_map[key]
					myfeature = "IMAGE_IMPORT__"+myfields2[0]+"__"+key
					myvalue = image_import_map[key]
					print >> pefileresult, myfeature, myvalue
				image_import_map = {}
			print >> pefileresult, "IMAGE_IMPORT__"+myfields[0], "true"			
		elif category == "IMAGE_RESOURCE_DIRECTORY":
			#print myline2
			if len(myfields) >= 1 and (myfields[0] == "Id:" or myfields[0] == "Name:"):
				#print myline2
				if len(myfields) == 3:
					resource_name = myfields[2].strip('()')
					#print resource_name
					top_resource = 1
					total_resource_size = 0
					total_resource_cnt = 0
				elif len(myfields) == 2:
					top_resource = 0
			elif len(myline2) >= 2 and myline2[0] == '0' and myline2[1] == 'x':
				# this is the top level feature for resources
				myfeature = category+"__TOP__"+myfields[2].strip(':')
				myvalue = myfields[3]
				print >> pefileresult, myfeature, myvalue
			elif resource_name != "-" and top_resource == 1 and myfields[0][0] == '0' and myfields[0][1] == 'x':
				myfeature = resource_name + "__"+myfields[2].strip(':')
				myvalue = myfields[3]
				print >> pefileresult, myfeature, myvalue
			elif len(myfields) >= 3 and len(myfields[0]) >= 2 and myfields[0][0] == '0' and myfields[0][1] == 'x' and myfields[2] == "Size:":
				#print myfields[3], int(myfields[3], 16)
				total_resource_size = total_resource_size + int(myfields[3], 16)
				total_resource_cnt += 1
		elif category == "IMAGE_BASE_RELOCATION":
			if len(myfields) >= 4 and myfields[2] == "SizeOfBlock:":
				total_reloc_size += int(myfields[3], 16)
				total_reloc_cnt += 1

	if unnamed_resource_cnt == 0 or unnamed_resource_size == 0:
		print >> pefileresult, "IMAGE_RESOURCE__UNNAMED_SIZE", 0
	else:
		print >> pefileresult, "IMAGE_RESOURCE__UNNAMED_SIZE", unnamed_resource_size / unnamed_resource_cnt	

	if total_reloc_cnt == 0 or total_reloc_size == 0:
                print >> pefileresult, "IMAGE_BASE_REOLOCATION__SIZE", 0
	else:
                print >> pefileresult, "IMAGE_BASE_REOLOCATION__SIZE", total_reloc_size / total_reloc_cnt

	pefileout.close()
	pefileresult.close()
	return myresultlist

#---------- Processing pin trace

def procPinTrace(dname):
	myresultlist = []
	pin_trace_file = dname.replace(".exe", ".pin.trace")
	# if the pin file is not there, just ignore it
	if not os.path.exists(pin_trace_file):
		return myresultlist
	myfile = open(pin_trace_file, 'r')
	mybuffer = []
	mycallmap = {}

	# read every line of the dump file
	mylineid = 0
	for myline in myfile.readlines():
		mylineid += 1
		if (myline[0] != 'm' and myline[0] != 'n' and myline[0] != 'i') :
			continue
			
		#print myline
		myline2 = myline.strip('\n\r')
		#print 'splitting ', myline

		if myline[0] == 'm' or myline[0] == 'n':
			myarray = myline2.split(' ')
			if len(myarray) == 1:
				continue
				#print mylineid, myline2
			#print len(myarray)
			operator = myarray[1]
			mybuffer.append(operator)
		elif myline[0] == 'i':
			myarray = myline2.split(':')
			if len(myarray) == 1:
				continue
			mycall = myarray[1]
			result = mycallmap.get(mycall, None)
			if result == None:
				mycallmap[mycall] = 1
			else:
				mycallmap[mycall] += 1

	myfile.close()

	mynum = len(mybuffer)	
	#print "mynum=", mynum
        for x in range(2):
                n = x + 1
                mymap = {}
                for k in range(mynum - n + 1):
                        mykey = mybuffer[k]
                        for s in range(n-1):
                                mykey += "."+mybuffer[k + s + 1]
                        result = mymap.get(mykey, None)
                        if result == None:
                                mymap[mykey] = 1
                        else:
                                mymap[mykey] += 1

                outputfile = pin_trace_file + '.operator.' + str(n)
                output = open(outputfile, 'w')
                total = 0.0;
                for key in mymap.keys():
                        total += mymap[key]
		print >> output, "#total = ", total
		if total > 0:
			for key in mymap.keys():
				print >> output, key, '\t', format(mymap[key] / total, '.10f')
		output.close()
                myresultlist.append(outputfile)

	# print the call map
	outputfile = pin_trace_file + '.call'
	myresultlist.append(outputfile)
	output = open(outputfile, 'w')
	for key in mycallmap.keys():
		print >> output, key, '\t', mycallmap[key]
	output.close()
	
	return myresultlist

#---------- obtain the list of dependent libraries.
def procLibs(dname):
    	myresultlist = []
	lddout = dname + '.ldd'
	lddcmd = '/usr/bin/ldd ' + dname + ' > ' + lddout
	print "Running ldd command ...", lddcmd
	os.system(lddcmd)

	myfile = open(lddout, 'r')
	mybuffer = []
	
        for line in myfile:
                fields = line.rstrip().split(' ')
                if len(fields) <= 2:
                        continue
                mybuffer.append(fields[0].lstrip('\t'))
                        
        myfile.close()
        print mybuffer

        # print out the callmap
        outputfile = dname + '.libs'
        output = open(outputfile, 'w')
        print >> output, "#total = ", len(mybuffer)
        for lib in mybuffer:
                print >> output, lib, '\t', 1
        output.close()
        myresultlist.append(outputfile)

	return myresultlist    

#---------- find executable files in a directory and process them
	
def findExe(dname):
	myresultlist = []
	if os.path.isdir(dname) == True:
		stat = os.stat(dname)
		created = os.stat(dname).st_mtime
		asciiTime = time.asctime( time.gmtime( created ) )
		#print dname, "is a dir  (created", asciiTime, ")"
		dirList2 = os.listdir(dname)
		for dir2 in dirList2:
			dname2 = dname + '/' + dir2
			myresultlist += findExe(dname2)
	else:
		#print dname
		stat = os.stat(dname)
		created = os.stat(dname).st_mtime
		asciiTime = time.asctime( time.gmtime( created ) )
		#print dname, "is a file (created", asciiTime, ")"
		#print 'file ', dname

                cmdoutput = commands.getoutput('file '+dname)
                if cmdoutput.find('ELF') != -1: 
                        #if cmdoutput.find('PE32 executable') != -1 or cmdoutput.find('MS-DOS executable') != -1:
                        try:
                                """
                                # the first stage: process hexdump
                                try:
                                        myresultlist += procHexdump(dname)
                                except IOError:
                                        print "IOError when dealing Hexdump for", dname
                                """

                                """
                                # the second stage: process objdump
                                try:
                                        myresultlist += procObjdump(dname)
                                except IOError:
                                        print "IOError when dealing Objdump for", dname
                                        
                                # the second stage: process libs
                                try:
                                        myresultlist += procLibs(dname)
                                except IOError:
                                        print "IOError when dealing ldd for", dname
                                """

                                # the second stage: process readelf
                                try:
                                        myresultlist += procRelocs(dname)
                                except IOError:
                                        print "IOError when dealing relocs for", dname   

                        finally:
                                print "Successfully finishing ", dname
                else:
                        print "file ", dname, "-->", cmdoutput
	return myresultlist

def findZip(dname, testpath, outputpath):
        #if not os.path.exists(dname) or not os.path.exists(testpath) or not os.path.exists(outputpath):
        #	return
        #print "findZip with dname=", dname, " testpath=", testpath, " outputpath=", outputpath
        if os.path.islink(dname) == True:
                return
        elif os.path.isdir(dname) == True:
                try:
                        stat = os.stat(dname)
                except OSError:
                        return
                created = os.stat(dname).st_mtime
                asciiTime = time.asctime( time.gmtime( created ) )
                if not os.path.exists(outputpath):
                        #print "1 mkdir ", outputpath
                        os.mkdir(outputpath)

                #print dname, "is a dir  (created", asciiTime, ")"                                                            
                dirList2 = os.listdir(dname)
                for dir2 in dirList2:
                        dname2 = dname + '/' + dir2
                        outputpath2 = outputpath + '/' + dir2
                        #if not os.path.exists(outputpath2):
                        #	os.mkdir(outputpath2)
                        findZip(dname2,testpath,outputpath2)
        else:
                #print dname
                try:
                        stat = os.stat(dname)
                except OSError:
                        return
                created = stat.st_mtime
                asciiTime = time.asctime( time.gmtime( created ) )
                #print dname, "is a file (created", asciiTime, ")"                                                            
                #print 'file ', dname
                myitems = os.path.split(dname)
                myexe = myitems[-1]
                print "dname", dname, "myitems", myitems, "myexe", myexe
                testdir = testpath+'/'+myexe
                if not os.path.exists(testdir):
                        os.mkdir(testdir)

                testfile = testdir + '/' + myexe
                print "dname", dname, "testfile", testfile
                shutil.copyfile(dname, testfile)

                resultlist = findExe(testfile)
                #print "resultlist = ", resultlist
                if len(resultlist) > 0:
                        if not os.path.exists(outputpath):
                                os.mkdir(outputpath)
                        for result in resultlist:
                                shutil.copy(result, outputpath)

                if os.path.exists(testfile):
                        os.remove(testfile)
                if os.path.exists(testdir):
                        shutil.rmtree(testdir)			

        return

def testfun():
	a = {}
	a['one'] = 1
	a['two'] = 2 
	return a

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#
# Top level code
#
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

print sys.argv
if len(sys.argv) != 4:
        os.exit()

srcpath = sys.argv[1]
testpath = sys.argv[2]
outputpath = sys.argv[3]

if os.path.exists(srcpath) and os.path.exists(testpath):
	if not os.path.exists(outputpath):
		os.mkdir(outputpath)
	findZip(srcpath, testpath, outputpath)
        

		
