1
2
3
4
5
6
7
8
9
10
11 package org.csveed.token;
12
13 import static org.csveed.token.EncounteredSymbol.END_OF_FILE_SYMBOL;
14 import static org.csveed.token.EncounteredSymbol.EOL_SYMBOL;
15 import static org.csveed.token.EncounteredSymbol.EOL_SYMBOL_TRASH;
16 import static org.csveed.token.EncounteredSymbol.ESCAPE_SYMBOL;
17 import static org.csveed.token.EncounteredSymbol.OTHER_SYMBOL;
18 import static org.csveed.token.EncounteredSymbol.QUOTE_SYMBOL;
19
20 import java.util.Map;
21 import java.util.TreeMap;
22
23 import org.csveed.report.CsvException;
24 import org.csveed.report.GeneralError;
25 import org.slf4j.Logger;
26 import org.slf4j.LoggerFactory;
27
28
29
30
31 public class SymbolMapping {
32
33
34 private static final Logger LOG = LoggerFactory.getLogger(SymbolMapping.class);
35
36
37 private Map<EncounteredSymbol, char[]> symbolToChars = new TreeMap<>();
38
39
40 private Map<Character, EncounteredSymbol> charToSymbol = new TreeMap<>();
41
42
43 private Character escapeCharacter;
44
45
46 private Character quoteCharacter;
47
48
49 private boolean settingsLogged;
50
51
52 private int startLine = 1;
53
54
55 private boolean skipCommentLines = true;
56
57
58
59
60
61
62 private char acceptedEndOfLine;
63
64
65
66
67 public SymbolMapping() {
68 initDefaultMapping();
69 }
70
71
72
73
74 public void initDefaultMapping() {
75 addMapping(EncounteredSymbol.ESCAPE_SYMBOL, '"');
76 addMapping(EncounteredSymbol.QUOTE_SYMBOL, '"');
77 addMapping(EncounteredSymbol.SEPARATOR_SYMBOL, ';');
78 addMapping(EncounteredSymbol.EOL_SYMBOL, new char[] { '\r', '\n' });
79 addMapping(EncounteredSymbol.SPACE_SYMBOL, ' ');
80 addMapping(EncounteredSymbol.BOM_SYMBOL, '\uFEFF');
81 addMapping(EncounteredSymbol.COMMENT_SYMBOL, '#');
82 }
83
84
85
86
87
88
89
90
91
92 public char getFirstMappedCharacter(EncounteredSymbol encounteredSymbol) {
93 char[] mappedCharacters = getMappedCharacters(encounteredSymbol);
94 return mappedCharacters == null ? 0 : mappedCharacters[0];
95 }
96
97
98
99
100
101
102
103
104
105 public char[] getMappedCharacters(EncounteredSymbol encounteredSymbol) {
106 return symbolToChars.get(encounteredSymbol);
107 }
108
109
110
111
112
113
114
115
116
117 public void addMapping(EncounteredSymbol symbol, Character character) {
118 addMapping(symbol, new char[] { character });
119 if (symbol.isCheckForSimilarEscapeAndQuote()) {
120 storeCharacterForLaterComparison(symbol, character);
121 }
122 }
123
124
125
126
127
128
129
130
131
132 public void addMapping(EncounteredSymbol symbol, char[] characters) {
133 while (charToSymbol.values().remove(symbol)) {
134
135 }
136 for (Character character : characters) {
137 charToSymbol.put(character, symbol);
138 }
139 symbolToChars.put(symbol, characters);
140 }
141
142
143
144
145 public void logSettings() {
146 if (settingsLogged) {
147 return;
148 }
149 LOG.info("- CSV config / skip comment lines? {}", isSkipCommentLines() ? "yes" : "no");
150 LOG.info("- CSV config / start line: {}", startLine);
151 for (Map.Entry<EncounteredSymbol, char[]> entry : symbolToChars.entrySet()) {
152 char[] characters = entry.getValue();
153 if (LOG.isInfoEnabled()) {
154 LOG.info("- CSV config / Characters for {} {}", entry.getKey(), charactersToString(characters));
155 }
156 }
157 settingsLogged = true;
158 }
159
160
161
162
163
164
165
166
167
168 private String charactersToString(char[] characters) {
169 StringBuilder returnString = new StringBuilder();
170 for (char currentChar : characters) {
171 returnString.append(charToPrintable(currentChar));
172 returnString.append(" ");
173 }
174 return returnString.toString();
175 }
176
177
178
179
180
181
182
183
184
185 private String charToPrintable(char character) {
186 switch (character) {
187 case '\t':
188 return "\\t";
189 case '\n':
190 return "\\n";
191 case '\r':
192 return "\\r";
193 default:
194 return Character.toString(character);
195 }
196 }
197
198
199
200
201
202
203
204
205
206 private void storeCharacterForLaterComparison(EncounteredSymbol symbol, Character character) {
207 if (symbol == ESCAPE_SYMBOL) {
208 escapeCharacter = character;
209 } else if (symbol == QUOTE_SYMBOL) {
210 quoteCharacter = character;
211 }
212 }
213
214
215
216
217
218
219 public boolean isSameCharactersForEscapeAndQuote() {
220 return escapeCharacter != null && quoteCharacter != null && escapeCharacter.equals(quoteCharacter);
221 }
222
223
224
225
226
227
228
229
230
231
232
233 public EncounteredSymbol find(int character, ParseState parseState) {
234 if (character == -1) {
235 return END_OF_FILE_SYMBOL;
236 }
237 EncounteredSymbol symbol = charToSymbol.get((char) character);
238 if (symbol == null) {
239 return OTHER_SYMBOL;
240 }
241 if (symbol == EOL_SYMBOL) {
242 if (acceptedEndOfLine == 0) {
243 LOG.info("- Triggering EOL character: {}", character);
244 acceptedEndOfLine = (char) character;
245 }
246 if (acceptedEndOfLine != character) {
247 symbol = EOL_SYMBOL_TRASH;
248 }
249 }
250 if (symbol.isCheckForSimilarEscapeAndQuote() && isSameCharactersForEscapeAndQuote()) {
251 return parseState.isUpgradeQuoteToEscape() ? ESCAPE_SYMBOL : QUOTE_SYMBOL;
252 }
253 return symbol;
254 }
255
256
257
258
259
260
261 public int getStartLine() {
262 return startLine;
263 }
264
265
266
267
268
269
270
271 public void setStartLine(int startLine) {
272 if (startLine == 0) {
273 throw new CsvException(new GeneralError("Row cannot be set at 0. Rows are 1-based"));
274 }
275 this.startLine = startLine;
276 }
277
278
279
280
281
282
283 public boolean isSkipCommentLines() {
284 return skipCommentLines;
285 }
286
287
288
289
290
291
292
293 public void setSkipCommentLines(boolean skipCommentLines) {
294 this.skipCommentLines = skipCommentLines;
295 }
296 }