]> git.somenet.org - pub/jan/adbs.git/blob - ex2/mapreduce/CSVSplitter.java
[ex3.1] communication costs.
[pub/jan/adbs.git] / ex2 / mapreduce / CSVSplitter.java
1 // Created as a template for Advanced Database Systems 2019\r
2 \r
3 import java.util.ArrayList;\r
4 \r
5 public class CSVSplitter {\r
6         public static String[] split(String input){\r
7                 ArrayList<String> output = new ArrayList<String>();\r
8                 int start = 0;\r
9                 boolean insideQuote = false;\r
10 \r
11                 for (int current = 0; current < input.length();current++){\r
12                         char c = input.charAt(current);\r
13                         switch (c){\r
14                         case ',':\r
15                                 if (!insideQuote) {\r
16                                         output.add(input.substring(start,current));\r
17                                         start = current +1;\r
18                                 }\r
19                                 break;\r
20                         case '"':\r
21                                 if (current == start) {\r
22                                         insideQuote = true;\r
23                                 } else if ( (current < input.length() - 1) && input.charAt(current+1) == '"'){ // escaped quote\r
24                                         current = current + 1; // skip ahead\r
25                                         continue;\r
26                                 } else if ( (current < input.length() - 1) && input.charAt(current+1) == ',') {\r
27                                         insideQuote = false;\r
28                                 }\r
29                                 break;\r
30                         }\r
31                 }\r
32                 output.add(input.substring(start));\r
33                 return output.toArray( new String[output.size()]);\r
34         }\r
35 \r
36         //This version does not consider quote when deciding splits\r
37         public static String[] splitNoQuote(String input){\r
38                   ArrayList<String> output = new ArrayList<String>();\r
39                         int start = 0;\r
40                         boolean insideQuote = false;\r
41 \r
42                         for (int current = 0; current < input.length();current++){\r
43                                 char c = input.charAt(current);\r
44                                 switch (c){\r
45                                 case ',':\r
46                                         if (!insideQuote) {\r
47                                                 output.add(input.substring(start,current));\r
48                                                 start = current +1;\r
49                                         }\r
50                                         break;\r
51 \r
52                                 }\r
53                         }\r
54                         output.add(input.substring(start));\r
55                         return output.toArray( new String[output.size()]);\r
56         }\r
57 }\r