2 # -*- coding: utf-8 -*-
5 from csv import reader, Sniffer, writer, QUOTE_MINIMAL
6 from os.path import exists
7 from argparse import ArgumentParser
10 def principal_symbols(f_in, feat, sortby):
12 with open(f_in, 'r') as f:
13 csv = reader(f, delimiter=',', quotechar='"')
15 # Make sure that CSV file includes a header.
16 if not Sniffer().has_header(f.read(8192)):
17 sys.stderr.write("ERROR: CSV file has no header!\n")
21 # Parse header of the input file
24 sort_field = header.index(sortby)
26 sys.stderr.write("ERROR: Cannot find column '%s' in " % sortby +
30 feat_field = header.index(feat)
32 sys.stderr.write("ERROR: Cannot find column '%s' " % str(feat) +
34 sys.stderr.write("These are the values that were found: " +
35 "%s\n" % ", ".join(header))
38 # Parse input file and build datastructure
40 if row[sort_field] not in pkts:
41 pkts[row[sort_field]] = dict()
42 if row[feat_field] not in pkts[row[sort_field]]:
43 pkts[row[sort_field]][row[feat_field]] = 1
45 pkts[row[sort_field]][row[feat_field]] += 1
47 # Calculate multimodality estimation and write to output
48 csvfile = open("/dev/stdout", 'wb')
49 csvwriter = writer(csvfile, delimiter=',', quotechar='"',
50 quoting=QUOTE_MINIMAL)
51 csvwriter.writerow([sortby, 'Number of packets',
52 'Number of states', 'Principal symbols'])
54 for src in pkts.iterkeys():
55 tmp_max = [pkts[src][feat] for feat in pkts[src].iterkeys()]
56 tmp_sum = map(lambda x: pow(x, 2), tmp_max)
57 multimod = float(sum(tmp_sum)) / pow(max(tmp_max), 2)
58 csvwriter.writerow([src, sum(tmp_max), len(pkts[src]), multimod])
64 parser = ArgumentParser(description="Estimate principal symbols.",
65 epilog='Example: %s --input ' % sys.argv[0] +
66 'csv_file.csv --feature "Time to live" ' +
67 '--sort-by \\ "Source"')
68 parser.add_argument("--input", type=str, required=True,
69 help="Input CSV file.")
70 parser.add_argument("--feature", type=str, required=True,
71 help="Feature to aggregate.")
72 parser.add_argument("--sort-by", type=str, default='Source',
73 choices=['Source', 'Destination'], required=False,
74 help="Sort by source or destination IP address.")
75 args = parser.parse_args()
76 if not exists(args.input):
77 sys.stderr.write("ERROR: Input file '%s' " % args.input +
80 principal_symbols(args.input, args.feature, args.sort_by)
83 if __name__ == '__main__':