/* * Copyright (C) 2016 Enrico Mariotti * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 or (at your option) * version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "CsvParser.h" #include #include #include "core/Tools.h" CsvParser::CsvParser() : m_ch(0) , m_comment('#') , m_currCol(1) , m_currRow(1) , m_isBackslashSyntax(false) , m_isEof(false) , m_isFileLoaded(false) , m_isGood(true) , m_lastPos(-1) , m_maxCols(0) , m_qualifier('"') , m_separator(',') , m_statusMsg("") { m_csv.setBuffer(&m_array); m_ts.setDevice(&m_csv); m_csv.open(QIODevice::ReadOnly); m_ts.setCodec("UTF-8"); } CsvParser::~CsvParser() { m_csv.close(); } bool CsvParser::isFileLoaded() { return m_isFileLoaded; } bool CsvParser::reparse() { reset(); return parseFile(); } bool CsvParser::parse(QFile *device) { clear(); if (nullptr == device) { m_statusMsg += QObject::tr("NULL device\n"); return false; } if (!readFile(device)) return false; return parseFile(); } bool CsvParser::readFile(QFile *device) { if (device->isOpen()) device->close(); device->open(QIODevice::ReadOnly); if (!Tools::readAllFromDevice(device, m_array)) { m_statusMsg += QObject::tr("Error reading from device\n"); m_isFileLoaded = false; } else { device->close(); m_array.replace("\r\n", "\n"); m_array.replace("\r", "\n"); if (0 == m_array.size()) { m_statusMsg += QObject::tr("File empty\n"); } m_isFileLoaded = true; } return m_isFileLoaded; } void CsvParser::reset() { m_ch = 0; m_currCol = 1; m_currRow = 1; m_isEof = false; m_isGood = true; m_lastPos = -1; m_maxCols = 0; m_statusMsg = ""; m_ts.seek(0); m_table.clear(); //the following are users' concern :) //m_comment = '#'; //m_backslashSyntax = false; //m_comment = '#'; //m_qualifier = '"'; //m_separator = ','; } void CsvParser::clear() { reset(); m_isFileLoaded = false; m_array.clear(); } bool CsvParser::parseFile() { parseRecord(); while (!m_isEof) { if (!skipEndline()) appendStatusMsg(QObject::tr("malformed string")); m_currRow++; m_currCol = 1; parseRecord(); } fillColumns(); return m_isGood; } void CsvParser::parseRecord() { CsvRow row; if (isComment()) { skipLine(); return; } do { parseField(row); getChar(m_ch); } while (isSeparator(m_ch) && !m_isEof); if (!m_isEof) ungetChar(); if (isEmptyRow(row)) { row.clear(); return; } m_table.push_back(row); if (m_maxCols < row.size()) m_maxCols = row.size(); m_currCol++; } void CsvParser::parseField(CsvRow& row) { QString field; peek(m_ch); if (!isTerminator(m_ch)) { if (isQualifier(m_ch)) parseQuoted(field); else parseSimple(field); } row.push_back(field); } void CsvParser::parseSimple(QString &s) { QChar c; getChar(c); while ((isText(c)) && (!m_isEof)) { s.append(c); getChar(c); } if (!m_isEof) ungetChar(); } void CsvParser::parseQuoted(QString &s) { //read and discard initial qualifier (e.g. quote) getChar(m_ch); parseEscaped(s); //getChar(m_ch); if (!isQualifier(m_ch)) appendStatusMsg(QObject::tr("missing closing quote")); } void CsvParser::parseEscaped(QString &s) { parseEscapedText(s); while (processEscapeMark(s, m_ch)) parseEscapedText(s); if (!m_isEof) ungetChar(); } void CsvParser::parseEscapedText(QString &s) { getChar(m_ch); while ((!isQualifier(m_ch)) && !m_isEof) { s.append(m_ch); getChar(m_ch); } } bool CsvParser::processEscapeMark(QString &s, QChar c) { QChar buf; peek(buf); QChar c2; if (true == m_isBackslashSyntax) { //escape-character syntax, e.g. \" if (c != '\\') { return false; } //consume (and append) second qualifier getChar(c2); if (m_isEof) { c2='\\'; s.append('\\'); return false; } else { s.append(c2); return true; } } else { //double quote syntax, e.g. "" if (!isQualifier(c)) return false; peek(c2); if (!m_isEof) { //not EOF, can read one char if (isQualifier(c2)) { s.append(c2); getChar(c2); return true; } } return false; } } void CsvParser::fillColumns() { //fill shorter rows with empty placeholder columns for (int i = 0; i < m_table.size(); ++i) { int gap = m_maxCols-m_table.at(i).size(); if (gap > 0) { CsvRow r = m_table.at(i); for (int j = 0; j < gap; ++j) { r.append(QString("")); } m_table.replace(i, r); } } } void CsvParser::skipLine() { m_ts.readLine(); m_ts.seek(m_ts.pos() - 1); } bool CsvParser::skipEndline() { getChar(m_ch); return (m_ch == '\n'); } void CsvParser::getChar(QChar& c) { m_isEof = m_ts.atEnd(); if (!m_isEof) { m_lastPos = m_ts.pos(); m_ts >> c; } } void CsvParser::ungetChar() { if (!m_ts.seek(m_lastPos)) m_statusMsg += QObject::tr("Internal: unget lower bound exceeded"); } void CsvParser::peek(QChar& c) { getChar(c); if (!m_isEof) ungetChar(); } bool CsvParser::isQualifier(const QChar &c) const { if (true == m_isBackslashSyntax && (c != m_qualifier)) return (c == '\\'); else return (c == m_qualifier); } bool CsvParser::isComment() { bool result = false; QChar c2; qint64 pos = m_ts.pos(); do getChar(c2); while ((isSpace(c2) || isTab(c2)) && (!m_isEof)); if (c2 == m_comment) result = true; m_ts.seek(pos); return result; } bool CsvParser::isText(QChar c) const { return !( (isCRLF(c)) || (isSeparator(c)) ); } bool CsvParser::isEmptyRow(CsvRow row) const { CsvRow::const_iterator it = row.constBegin(); for (; it != row.constEnd(); ++it) if ( ((*it) != "\n") && ((*it) != "") ) return false; return true; } bool CsvParser::isCRLF(const QChar &c) const { return (c == '\n'); } bool CsvParser::isSpace(const QChar &c) const { return (c == 0x20); } bool CsvParser::isTab(const QChar &c) const { return (c == '\t'); } bool CsvParser::isSeparator(const QChar &c) const { return (c == m_separator); } bool CsvParser::isTerminator(const QChar &c) const { return (isSeparator(c) || (c == '\n') || (c == '\r')); } void CsvParser::setBackslashSyntax(bool set) { m_isBackslashSyntax = set; } void CsvParser::setComment(const QChar &c) { m_comment = c.unicode(); } void CsvParser::setCodec(const QString &s) { m_ts.setCodec(QTextCodec::codecForName(s.toLocal8Bit())); } void CsvParser::setFieldSeparator(const QChar &c) { m_separator = c.unicode(); } void CsvParser::setTextQualifier(const QChar &c) { m_qualifier = c.unicode(); } int CsvParser::getFileSize() const { return m_csv.size(); } const CsvTable CsvParser::getCsvTable() const { return m_table; } QString CsvParser::getStatus() const { if (m_statusMsg.size() > 100) return m_statusMsg.section('\n', 0, 4) .append("\n[...]\n").append(QObject::tr("More messages, skipped!")); return m_statusMsg; } int CsvParser::getCsvCols() const { if ((m_table.size() > 0) && (m_table.at(0).size() > 0)) return m_table.at(0).size(); else return 0; } int CsvParser::getCsvRows() const { return m_table.size(); } void CsvParser::appendStatusMsg(QString s) { m_statusMsg += s .append(" @" + QString::number(m_currRow)) .append(",") .append(QString::number(m_currCol)) .append("\n"); m_isGood = false; }