SymbolMapping.java

/*
 * CSVeed (https://github.com/42BV/CSVeed)
 *
 * Copyright 2013-2023 CSVeed.
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of The Apache Software License,
 * Version 2.0 which accompanies this distribution, and is available at
 * https://www.apache.org/licenses/LICENSE-2.0.txt
 */
package org.csveed.token;

import static org.csveed.token.EncounteredSymbol.END_OF_FILE_SYMBOL;
import static org.csveed.token.EncounteredSymbol.EOL_SYMBOL;
import static org.csveed.token.EncounteredSymbol.EOL_SYMBOL_TRASH;
import static org.csveed.token.EncounteredSymbol.ESCAPE_SYMBOL;
import static org.csveed.token.EncounteredSymbol.OTHER_SYMBOL;
import static org.csveed.token.EncounteredSymbol.QUOTE_SYMBOL;

import java.util.Map;
import java.util.TreeMap;

import org.csveed.report.CsvException;
import org.csveed.report.GeneralError;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * The Class SymbolMapping.
 */
public class SymbolMapping {

    /** The Constant LOG. */
    private static final Logger LOG = LoggerFactory.getLogger(SymbolMapping.class);

    /** The symbol to chars. */
    private Map<EncounteredSymbol, char[]> symbolToChars = new TreeMap<>();

    /** The char to symbol. */
    private Map<Character, EncounteredSymbol> charToSymbol = new TreeMap<>();

    /** The escape character. */
    private Character escapeCharacter;

    /** The quote character. */
    private Character quoteCharacter;

    /** The settings logged. */
    private boolean settingsLogged;

    /** The start line. */
    private int startLine = 1;

    /** The skip comment lines. */
    private boolean skipCommentLines = true;

    /**
     * The accepted end of line.
     * <p>
     * When multiple EOL characters have been given, only the first one encountered will be accepted.
     */
    private char acceptedEndOfLine;

    /**
     * Instantiates a new symbol mapping.
     */
    public SymbolMapping() {
        initDefaultMapping();
    }

    /**
     * Inits the default mapping.
     */
    public void initDefaultMapping() {
        addMapping(EncounteredSymbol.ESCAPE_SYMBOL, '"');
        addMapping(EncounteredSymbol.QUOTE_SYMBOL, '"');
        addMapping(EncounteredSymbol.SEPARATOR_SYMBOL, ';');
        addMapping(EncounteredSymbol.EOL_SYMBOL, new char[] { '\r', '\n' });
        addMapping(EncounteredSymbol.SPACE_SYMBOL, ' ');
        addMapping(EncounteredSymbol.BOM_SYMBOL, '\uFEFF');
        addMapping(EncounteredSymbol.COMMENT_SYMBOL, '#');
    }

    /**
     * Gets the first mapped character.
     *
     * @param encounteredSymbol
     *            the encountered symbol
     *
     * @return the first mapped character
     */
    public char getFirstMappedCharacter(EncounteredSymbol encounteredSymbol) {
        char[] mappedCharacters = getMappedCharacters(encounteredSymbol);
        return mappedCharacters == null ? 0 : mappedCharacters[0];
    }

    /**
     * Gets the mapped characters.
     *
     * @param encounteredSymbol
     *            the encountered symbol
     *
     * @return the mapped characters
     */
    public char[] getMappedCharacters(EncounteredSymbol encounteredSymbol) {
        return symbolToChars.get(encounteredSymbol);
    }

    /**
     * Adds the mapping.
     *
     * @param symbol
     *            the symbol
     * @param character
     *            the character
     */
    public void addMapping(EncounteredSymbol symbol, Character character) {
        addMapping(symbol, new char[] { character });
        if (symbol.isCheckForSimilarEscapeAndQuote()) {
            storeCharacterForLaterComparison(symbol, character);
        }
    }

    /**
     * Adds the mapping.
     *
     * @param symbol
     *            the symbol
     * @param characters
     *            the characters
     */
    public void addMapping(EncounteredSymbol symbol, char[] characters) {
        while (charToSymbol.values().remove(symbol)) {
            // Looping until all symbols removed
        }
        for (Character character : characters) {
            charToSymbol.put(character, symbol);
        }
        symbolToChars.put(symbol, characters);
    }

    /**
     * Log settings.
     */
    public void logSettings() {
        if (settingsLogged) {
            return;
        }
        LOG.info("- CSV config / skip comment lines? {}", isSkipCommentLines() ? "yes" : "no");
        LOG.info("- CSV config / start line: {}", startLine);
        for (Map.Entry<EncounteredSymbol, char[]> entry : symbolToChars.entrySet()) {
            char[] characters = entry.getValue();
            if (LOG.isInfoEnabled()) {
                LOG.info("- CSV config / Characters for {} {}", entry.getKey(), charactersToString(characters));
            }
        }
        settingsLogged = true;
    }

    /**
     * Characters to string.
     *
     * @param characters
     *            the characters
     *
     * @return the string
     */
    private String charactersToString(char[] characters) {
        StringBuilder returnString = new StringBuilder();
        for (char currentChar : characters) {
            returnString.append(charToPrintable(currentChar));
            returnString.append(" ");
        }
        return returnString.toString();
    }

    /**
     * Char to printable.
     *
     * @param character
     *            the character
     *
     * @return the string
     */
    private String charToPrintable(char character) {
        switch (character) {
            case '\t':
                return "\\t";
            case '\n':
                return "\\n";
            case '\r':
                return "\\r";
            default:
                return Character.toString(character);
        }
    }

    /**
     * Store character for later comparison.
     *
     * @param symbol
     *            the symbol
     * @param character
     *            the character
     */
    private void storeCharacterForLaterComparison(EncounteredSymbol symbol, Character character) {
        if (symbol == ESCAPE_SYMBOL) {
            escapeCharacter = character;
        } else if (symbol == QUOTE_SYMBOL) {
            quoteCharacter = character;
        }
    }

    /**
     * Checks if is same characters for escape and quote.
     *
     * @return true, if is same characters for escape and quote
     */
    public boolean isSameCharactersForEscapeAndQuote() {
        return escapeCharacter != null && quoteCharacter != null && escapeCharacter.equals(quoteCharacter);
    }

    /**
     * Find.
     *
     * @param character
     *            the character
     * @param parseState
     *            the parse state
     *
     * @return the encountered symbol
     */
    public EncounteredSymbol find(int character, ParseState parseState) {
        if (character == -1) {
            return END_OF_FILE_SYMBOL;
        }
        EncounteredSymbol symbol = charToSymbol.get((char) character);
        if (symbol == null) {
            return OTHER_SYMBOL;
        }
        if (symbol == EOL_SYMBOL) {
            if (acceptedEndOfLine == 0) {
                LOG.info("- Triggering EOL character: {}", character);
                acceptedEndOfLine = (char) character;
            }
            if (acceptedEndOfLine != character) {
                symbol = EOL_SYMBOL_TRASH;
            }
        }
        if (symbol.isCheckForSimilarEscapeAndQuote() && isSameCharactersForEscapeAndQuote()) {
            return parseState.isUpgradeQuoteToEscape() ? ESCAPE_SYMBOL : QUOTE_SYMBOL;
        }
        return symbol;
    }

    /**
     * Gets the start line.
     *
     * @return the start line
     */
    public int getStartLine() {
        return startLine;
    }

    /**
     * Sets the start line.
     *
     * @param startLine
     *            the new start line
     */
    public void setStartLine(int startLine) {
        if (startLine == 0) {
            throw new CsvException(new GeneralError("Row cannot be set at 0. Rows are 1-based"));
        }
        this.startLine = startLine;
    }

    /**
     * Checks if is skip comment lines.
     *
     * @return true, if is skip comment lines
     */
    public boolean isSkipCommentLines() {
        return skipCommentLines;
    }

    /**
     * Sets the skip comment lines.
     *
     * @param skipCommentLines
     *            the new skip comment lines
     */
    public void setSkipCommentLines(boolean skipCommentLines) {
        this.skipCommentLines = skipCommentLines;
    }
}