/* * Copyright (C) 2016 Enrico Mariotti * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 or (at your option) * version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include "core/Tools.h" #include "CsvParser.h" CsvParser::CsvParser() : m_ch(0) , m_comment('#') , m_currCol(1) , m_currRow(1) , m_isBackslashSyntax(false) , m_isEof(false) , m_isFileLoaded(false) , m_isGood(true) , m_lastPos(-1) , m_maxCols(0) , m_qualifier('"') , m_separator(',') , m_statusMsg("") { m_csv.setBuffer(&m_array); m_ts.setDevice(&m_csv); m_csv.open(QIODevice::ReadOnly); m_ts.setCodec("UTF-8"); } CsvParser::~CsvParser() { m_csv.close(); } bool CsvParser::isFileLoaded() { return m_isFileLoaded; } bool CsvParser::reparse() { reset(); return parseFile(); } bool CsvParser::parse(QFile *device) { clear(); if (nullptr == device) { m_statusMsg += QObject::tr("NULL device\n"); return false; } if (!readFile(device)) { return false; } return parseFile(); } bool CsvParser::readFile(QFile *device) { if (device->isOpen()) { device->close(); } device->open(QIODevice::ReadOnly); if (!Tools::readAllFromDevice(device, m_array)) { m_statusMsg += QObject::tr("Error reading from device\n"); m_isFileLoaded = false; } else { device->close(); m_array.replace("\r\n", "\n"); m_array.replace("\r", "\n"); if (0 == m_array.size()) { m_statusMsg += QObject::tr("File empty\n"); } m_isFileLoaded = true; } return m_isFileLoaded; } void CsvParser::reset() { m_ch = 0; m_currCol = 1; m_currRow = 1; m_isEof = false; m_isGood = true; m_lastPos = -1; m_maxCols = 0; m_statusMsg = ""; m_ts.seek(0); m_table.clear(); //the following are users' concern :) //m_comment = '#'; //m_backslashSyntax = false; //m_comment = '#'; //m_qualifier = '"'; //m_separator = ','; } void CsvParser::clear() { reset(); m_isFileLoaded = false; m_array.clear(); } bool CsvParser::parseFile() { parseRecord(); while (!m_isEof) { if (!skipEndline()) { appendStatusMsg(QObject::tr("malformed string")); } m_currRow++; m_currCol = 1; parseRecord(); } fillColumns(); return m_isGood; } void CsvParser::parseRecord() { csvrow row; if (isComment()) { skipLine(); return; } else { do { parseField(row); getChar(m_ch); } while (isSeparator(m_ch) && !m_isEof); if (!m_isEof) { ungetChar(); } if (isEmptyRow(row)) { row.clear(); return; } m_table.push_back(row); if (m_maxCols < row.size()) { m_maxCols = row.size(); } m_currCol++; } } void CsvParser::parseField(csvrow& row) { QString field; peek(m_ch); if (!isTerminator(m_ch)) { if (isQualifier(m_ch)) { parseQuoted(field); } else { parseSimple(field); } } row.push_back(field); } void CsvParser::parseSimple(QString &s) { QChar c; getChar(c); while ((isText(c)) && (!m_isEof)) { s.append(c); getChar(c); } if (!m_isEof) { ungetChar(); } } void CsvParser::parseQuoted(QString &s) { //read and discard initial qualifier (e.g. quote) getChar(m_ch); parseEscaped(s); //getChar(m_ch); if (!isQualifier(m_ch)) { appendStatusMsg(QObject::tr("missing closing quote")); } } void CsvParser::parseEscaped(QString &s) { parseEscapedText(s); while (processEscapeMark(s, m_ch)) { parseEscapedText(s); } if (!m_isEof) { ungetChar(); } } void CsvParser::parseEscapedText(QString &s) { getChar(m_ch); while ((!isQualifier(m_ch)) && !m_isEof) { s.append(m_ch); getChar(m_ch); } } bool CsvParser::processEscapeMark(QString &s, QChar c) { QChar buf; peek(buf); QChar c2; //escape-character syntax, e.g. \" if (true == m_isBackslashSyntax) { if (c != '\\') { return false; } //consume (and append) second qualifier getChar(c2); if (m_isEof){ c2='\\'; s.append('\\'); return false; } else { s.append(c2); return true; } } //double quote syntax, e.g. "" else { if (!isQualifier(c)) { return false; } peek(c2); if (!m_isEof) { //not EOF, can read one char if (isQualifier(c2)) { s.append(c2); getChar(c2); return true; } } return false; } } void CsvParser::fillColumns() { //fill the rows with lesser columns with empty fields for (int i=0; i 0) { csvrow r = m_table.at(i); for (int j=0; j> c; } } void CsvParser::ungetChar() { if (!m_ts.seek(m_lastPos)) m_statusMsg += QObject::tr("Internal: unget lower bound exceeded"); } void CsvParser::peek(QChar& c) { getChar(c); if (!m_isEof) { ungetChar(); } } bool CsvParser::isQualifier(const QChar c) const { if (true == m_isBackslashSyntax && (c != m_qualifier)) { return (c == '\\'); } else { return (c == m_qualifier); } } bool CsvParser::isComment() { bool result = false; QChar c2; qint64 pos = m_ts.pos(); do { getChar(c2); } while ((isSpace(c2) || isTab(c2)) && (!m_isEof)); if (c2 == m_comment) { result = true; } m_ts.seek(pos); return result; } bool CsvParser::isText(QChar c) const { return !( (isCRLF(c)) || (isSeparator(c)) ); } bool CsvParser::isEmptyRow(csvrow row) const { csvrow::const_iterator it = row.constBegin(); for (; it != row.constEnd(); ++it) { if ( ((*it) != "\n") && ((*it) != "") ) return false; } return true; } bool CsvParser::isCRLF(const QChar c) const { return (c == '\n'); } bool CsvParser::isSpace(const QChar c) const { return (c == 0x20); } bool CsvParser::isTab(const QChar c) const { return (c == '\t'); } bool CsvParser::isSeparator(const QChar c) const { return (c == m_separator); } bool CsvParser::isTerminator(const QChar c) const { return (isSeparator(c) || (c == '\n') || (c == '\r')); } void CsvParser::setBackslashSyntax(bool set) { m_isBackslashSyntax = set; } void CsvParser::setComment(const QChar c) { m_comment = c.unicode(); } void CsvParser::setCodec(const QString s) { m_ts.setCodec(QTextCodec::codecForName(s.toLocal8Bit())); } void CsvParser::setFieldSeparator(const QChar c) { m_separator = c.unicode(); } void CsvParser::setTextQualifier(const QChar c) { m_qualifier = c.unicode(); } int CsvParser::getFileSize() const { return m_csv.size(); } const csvtable CsvParser::getCsvTable() const { return m_table; } QString CsvParser::getStatus() const { return m_statusMsg; } int CsvParser::getCsvCols() const { if ((m_table.size() > 0) && (m_table.at(0).size() > 0)) return m_table.at(0).size(); else return 0; } int CsvParser::getCsvRows() const { return m_table.size(); } void CsvParser::appendStatusMsg(QString s) { m_statusMsg += s .append(" @" + QString::number(m_currRow)) .append(",") .append(QString::number(m_currCol)) .append("\n"); m_isGood = false; }