2017-01-08 19:33:21 -05:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2016 Enrico Mariotti <enricomariotti@yahoo.it>
|
2017-06-09 17:40:36 -04:00
|
|
|
* Copyright (C) 2017 KeePassXC Team <team@keepassxc.org>
|
2017-01-08 19:33:21 -05:00
|
|
|
*
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 2 or (at your option)
|
|
|
|
* version 3 of the License.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
2017-02-17 19:51:31 -05:00
|
|
|
#include "CsvParser.h"
|
|
|
|
|
2017-01-08 19:33:21 -05:00
|
|
|
#include <QTextCodec>
|
|
|
|
#include <QObject>
|
2017-02-17 19:51:31 -05:00
|
|
|
|
2017-01-08 19:33:21 -05:00
|
|
|
#include "core/Tools.h"
|
|
|
|
|
|
|
|
CsvParser::CsvParser()
|
|
|
|
: m_ch(0)
|
|
|
|
, m_comment('#')
|
|
|
|
, m_currCol(1)
|
|
|
|
, m_currRow(1)
|
|
|
|
, m_isBackslashSyntax(false)
|
|
|
|
, m_isEof(false)
|
|
|
|
, m_isFileLoaded(false)
|
|
|
|
, m_isGood(true)
|
|
|
|
, m_lastPos(-1)
|
|
|
|
, m_maxCols(0)
|
|
|
|
, m_qualifier('"')
|
|
|
|
, m_separator(',')
|
|
|
|
, m_statusMsg("")
|
|
|
|
{
|
|
|
|
m_csv.setBuffer(&m_array);
|
|
|
|
m_ts.setDevice(&m_csv);
|
|
|
|
m_csv.open(QIODevice::ReadOnly);
|
|
|
|
m_ts.setCodec("UTF-8");
|
|
|
|
}
|
|
|
|
|
|
|
|
CsvParser::~CsvParser() {
|
|
|
|
m_csv.close();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CsvParser::isFileLoaded() {
|
|
|
|
return m_isFileLoaded;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CsvParser::reparse() {
|
|
|
|
reset();
|
|
|
|
return parseFile();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool CsvParser::parse(QFile *device) {
|
|
|
|
clear();
|
|
|
|
if (nullptr == device) {
|
2017-02-21 19:03:22 -05:00
|
|
|
appendStatusMsg(QObject::tr("NULL device"), true);
|
2017-01-08 19:33:21 -05:00
|
|
|
return false;
|
|
|
|
}
|
2017-02-17 19:51:31 -05:00
|
|
|
if (!readFile(device))
|
2017-01-08 19:33:21 -05:00
|
|
|
return false;
|
|
|
|
return parseFile();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CsvParser::readFile(QFile *device) {
|
2017-02-17 19:51:31 -05:00
|
|
|
if (device->isOpen())
|
2017-01-08 19:33:21 -05:00
|
|
|
device->close();
|
|
|
|
|
|
|
|
device->open(QIODevice::ReadOnly);
|
|
|
|
if (!Tools::readAllFromDevice(device, m_array)) {
|
2017-02-21 19:03:22 -05:00
|
|
|
appendStatusMsg(QObject::tr("error reading from device"), true);
|
2017-01-08 19:33:21 -05:00
|
|
|
m_isFileLoaded = false;
|
|
|
|
}
|
|
|
|
else {
|
2017-02-17 19:51:31 -05:00
|
|
|
device->close();
|
2017-01-08 19:33:21 -05:00
|
|
|
|
2017-02-17 19:51:31 -05:00
|
|
|
m_array.replace("\r\n", "\n");
|
|
|
|
m_array.replace("\r", "\n");
|
2017-02-21 19:03:22 -05:00
|
|
|
if (0 == m_array.size())
|
|
|
|
appendStatusMsg(QObject::tr("file empty !\n"));
|
2017-02-17 19:51:31 -05:00
|
|
|
m_isFileLoaded = true;
|
2017-01-08 19:33:21 -05:00
|
|
|
}
|
|
|
|
return m_isFileLoaded;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CsvParser::reset() {
|
|
|
|
m_ch = 0;
|
|
|
|
m_currCol = 1;
|
|
|
|
m_currRow = 1;
|
|
|
|
m_isEof = false;
|
|
|
|
m_isGood = true;
|
|
|
|
m_lastPos = -1;
|
|
|
|
m_maxCols = 0;
|
|
|
|
m_statusMsg = "";
|
|
|
|
m_ts.seek(0);
|
|
|
|
m_table.clear();
|
|
|
|
//the following are users' concern :)
|
|
|
|
//m_comment = '#';
|
|
|
|
//m_backslashSyntax = false;
|
|
|
|
//m_comment = '#';
|
|
|
|
//m_qualifier = '"';
|
|
|
|
//m_separator = ',';
|
|
|
|
}
|
|
|
|
|
|
|
|
void CsvParser::clear() {
|
|
|
|
reset();
|
|
|
|
m_isFileLoaded = false;
|
|
|
|
m_array.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CsvParser::parseFile() {
|
|
|
|
parseRecord();
|
2017-02-17 19:51:31 -05:00
|
|
|
while (!m_isEof) {
|
|
|
|
if (!skipEndline())
|
2017-02-21 19:03:22 -05:00
|
|
|
appendStatusMsg(QObject::tr("malformed string"), true);
|
2017-01-08 19:33:21 -05:00
|
|
|
m_currRow++;
|
|
|
|
m_currCol = 1;
|
|
|
|
parseRecord();
|
|
|
|
}
|
|
|
|
fillColumns();
|
|
|
|
return m_isGood;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CsvParser::parseRecord() {
|
2017-02-17 19:51:31 -05:00
|
|
|
CsvRow row;
|
2017-01-08 19:33:21 -05:00
|
|
|
if (isComment()) {
|
|
|
|
skipLine();
|
|
|
|
return;
|
|
|
|
}
|
2017-02-17 19:51:31 -05:00
|
|
|
do {
|
|
|
|
parseField(row);
|
|
|
|
getChar(m_ch);
|
|
|
|
} while (isSeparator(m_ch) && !m_isEof);
|
2017-01-08 19:33:21 -05:00
|
|
|
|
2017-02-17 19:51:31 -05:00
|
|
|
if (!m_isEof)
|
|
|
|
ungetChar();
|
|
|
|
if (isEmptyRow(row)) {
|
|
|
|
row.clear();
|
|
|
|
return;
|
2017-01-08 19:33:21 -05:00
|
|
|
}
|
2017-02-17 19:51:31 -05:00
|
|
|
m_table.push_back(row);
|
|
|
|
if (m_maxCols < row.size())
|
|
|
|
m_maxCols = row.size();
|
|
|
|
m_currCol++;
|
2017-01-08 19:33:21 -05:00
|
|
|
}
|
|
|
|
|
2017-02-17 19:51:31 -05:00
|
|
|
void CsvParser::parseField(CsvRow& row) {
|
2017-01-08 19:33:21 -05:00
|
|
|
QString field;
|
|
|
|
peek(m_ch);
|
2017-02-17 19:51:31 -05:00
|
|
|
if (!isTerminator(m_ch)) {
|
|
|
|
if (isQualifier(m_ch))
|
2017-01-08 19:33:21 -05:00
|
|
|
parseQuoted(field);
|
2017-02-17 19:51:31 -05:00
|
|
|
else
|
|
|
|
parseSimple(field);
|
2017-01-08 19:33:21 -05:00
|
|
|
}
|
|
|
|
row.push_back(field);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CsvParser::parseSimple(QString &s) {
|
|
|
|
QChar c;
|
|
|
|
getChar(c);
|
2017-02-17 19:51:31 -05:00
|
|
|
while ((isText(c)) && (!m_isEof)) {
|
2017-01-08 19:33:21 -05:00
|
|
|
s.append(c);
|
|
|
|
getChar(c);
|
|
|
|
}
|
2017-02-17 19:51:31 -05:00
|
|
|
if (!m_isEof)
|
2017-01-08 19:33:21 -05:00
|
|
|
ungetChar();
|
|
|
|
}
|
|
|
|
|
|
|
|
void CsvParser::parseQuoted(QString &s) {
|
|
|
|
//read and discard initial qualifier (e.g. quote)
|
|
|
|
getChar(m_ch);
|
|
|
|
parseEscaped(s);
|
|
|
|
//getChar(m_ch);
|
2017-02-17 19:51:31 -05:00
|
|
|
if (!isQualifier(m_ch))
|
2017-02-21 19:03:22 -05:00
|
|
|
appendStatusMsg(QObject::tr("missing closing quote"), true);
|
2017-01-08 19:33:21 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
void CsvParser::parseEscaped(QString &s) {
|
|
|
|
parseEscapedText(s);
|
2017-02-17 19:51:31 -05:00
|
|
|
while (processEscapeMark(s, m_ch))
|
2017-01-08 19:33:21 -05:00
|
|
|
parseEscapedText(s);
|
2017-02-17 19:51:31 -05:00
|
|
|
if (!m_isEof)
|
2017-01-08 19:33:21 -05:00
|
|
|
ungetChar();
|
|
|
|
}
|
|
|
|
|
|
|
|
void CsvParser::parseEscapedText(QString &s) {
|
|
|
|
getChar(m_ch);
|
2017-02-17 19:51:31 -05:00
|
|
|
while ((!isQualifier(m_ch)) && !m_isEof) {
|
2017-01-08 19:33:21 -05:00
|
|
|
s.append(m_ch);
|
|
|
|
getChar(m_ch);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CsvParser::processEscapeMark(QString &s, QChar c) {
|
|
|
|
QChar buf;
|
|
|
|
peek(buf);
|
|
|
|
QChar c2;
|
2017-02-17 19:51:31 -05:00
|
|
|
if (true == m_isBackslashSyntax) {
|
|
|
|
//escape-character syntax, e.g. \"
|
2017-01-08 19:33:21 -05:00
|
|
|
if (c != '\\') {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
//consume (and append) second qualifier
|
|
|
|
getChar(c2);
|
2017-02-17 19:51:31 -05:00
|
|
|
if (m_isEof) {
|
2017-01-08 19:33:21 -05:00
|
|
|
c2='\\';
|
|
|
|
s.append('\\');
|
|
|
|
return false;
|
2017-02-17 19:51:31 -05:00
|
|
|
} else {
|
2017-01-08 19:33:21 -05:00
|
|
|
s.append(c2);
|
|
|
|
return true;
|
|
|
|
}
|
2017-02-17 19:51:31 -05:00
|
|
|
} else {
|
|
|
|
//double quote syntax, e.g. ""
|
|
|
|
if (!isQualifier(c))
|
2017-01-08 19:33:21 -05:00
|
|
|
return false;
|
|
|
|
peek(c2);
|
|
|
|
if (!m_isEof) { //not EOF, can read one char
|
|
|
|
if (isQualifier(c2)) {
|
|
|
|
s.append(c2);
|
|
|
|
getChar(c2);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CsvParser::fillColumns() {
|
2017-02-17 19:51:31 -05:00
|
|
|
//fill shorter rows with empty placeholder columns
|
|
|
|
for (int i = 0; i < m_table.size(); ++i) {
|
2017-01-08 19:33:21 -05:00
|
|
|
int gap = m_maxCols-m_table.at(i).size();
|
|
|
|
if (gap > 0) {
|
2017-02-17 19:51:31 -05:00
|
|
|
CsvRow r = m_table.at(i);
|
|
|
|
for (int j = 0; j < gap; ++j) {
|
2017-01-08 19:33:21 -05:00
|
|
|
r.append(QString(""));
|
|
|
|
}
|
|
|
|
m_table.replace(i, r);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CsvParser::skipLine() {
|
|
|
|
m_ts.readLine();
|
2017-02-17 19:51:31 -05:00
|
|
|
m_ts.seek(m_ts.pos() - 1);
|
2017-01-08 19:33:21 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
bool CsvParser::skipEndline() {
|
|
|
|
getChar(m_ch);
|
|
|
|
return (m_ch == '\n');
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void CsvParser::getChar(QChar& c) {
|
|
|
|
m_isEof = m_ts.atEnd();
|
|
|
|
if (!m_isEof) {
|
|
|
|
m_lastPos = m_ts.pos();
|
|
|
|
m_ts >> c;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CsvParser::ungetChar() {
|
|
|
|
if (!m_ts.seek(m_lastPos))
|
2017-02-21 19:03:22 -05:00
|
|
|
appendStatusMsg(QObject::tr("INTERNAL - unget lower bound exceeded"), true);
|
2017-01-08 19:33:21 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
void CsvParser::peek(QChar& c) {
|
|
|
|
getChar(c);
|
2017-02-17 19:51:31 -05:00
|
|
|
if (!m_isEof)
|
2017-01-08 19:33:21 -05:00
|
|
|
ungetChar();
|
|
|
|
}
|
|
|
|
|
2017-02-17 19:51:31 -05:00
|
|
|
bool CsvParser::isQualifier(const QChar &c) const {
|
|
|
|
if (true == m_isBackslashSyntax && (c != m_qualifier))
|
2017-01-08 19:33:21 -05:00
|
|
|
return (c == '\\');
|
2017-02-17 19:51:31 -05:00
|
|
|
else
|
|
|
|
return (c == m_qualifier);
|
2017-01-08 19:33:21 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
bool CsvParser::isComment() {
|
|
|
|
bool result = false;
|
|
|
|
QChar c2;
|
|
|
|
qint64 pos = m_ts.pos();
|
|
|
|
|
2017-02-17 19:51:31 -05:00
|
|
|
do getChar(c2);
|
|
|
|
while ((isSpace(c2) || isTab(c2)) && (!m_isEof));
|
2017-01-08 19:33:21 -05:00
|
|
|
|
2017-02-17 19:51:31 -05:00
|
|
|
if (c2 == m_comment)
|
2017-01-08 19:33:21 -05:00
|
|
|
result = true;
|
|
|
|
m_ts.seek(pos);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CsvParser::isText(QChar c) const {
|
|
|
|
return !( (isCRLF(c)) || (isSeparator(c)) );
|
|
|
|
}
|
|
|
|
|
2017-02-17 19:51:31 -05:00
|
|
|
bool CsvParser::isEmptyRow(CsvRow row) const {
|
|
|
|
CsvRow::const_iterator it = row.constBegin();
|
|
|
|
for (; it != row.constEnd(); ++it)
|
2017-01-08 19:33:21 -05:00
|
|
|
if ( ((*it) != "\n") && ((*it) != "") )
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-02-17 19:51:31 -05:00
|
|
|
bool CsvParser::isCRLF(const QChar &c) const {
|
2017-01-08 19:33:21 -05:00
|
|
|
return (c == '\n');
|
|
|
|
}
|
|
|
|
|
2017-02-17 19:51:31 -05:00
|
|
|
bool CsvParser::isSpace(const QChar &c) const {
|
2017-03-05 18:47:49 -05:00
|
|
|
return (c == ' ');
|
2017-01-08 19:33:21 -05:00
|
|
|
}
|
|
|
|
|
2017-02-17 19:51:31 -05:00
|
|
|
bool CsvParser::isTab(const QChar &c) const {
|
2017-01-08 19:33:21 -05:00
|
|
|
return (c == '\t');
|
|
|
|
}
|
|
|
|
|
2017-02-17 19:51:31 -05:00
|
|
|
bool CsvParser::isSeparator(const QChar &c) const {
|
2017-01-08 19:33:21 -05:00
|
|
|
return (c == m_separator);
|
|
|
|
}
|
|
|
|
|
2017-02-17 19:51:31 -05:00
|
|
|
bool CsvParser::isTerminator(const QChar &c) const {
|
2017-01-08 19:33:21 -05:00
|
|
|
return (isSeparator(c) || (c == '\n') || (c == '\r'));
|
|
|
|
}
|
|
|
|
|
|
|
|
void CsvParser::setBackslashSyntax(bool set) {
|
|
|
|
m_isBackslashSyntax = set;
|
|
|
|
}
|
|
|
|
|
2017-02-17 19:51:31 -05:00
|
|
|
void CsvParser::setComment(const QChar &c) {
|
2017-01-08 19:33:21 -05:00
|
|
|
m_comment = c.unicode();
|
|
|
|
}
|
|
|
|
|
2017-02-17 19:51:31 -05:00
|
|
|
void CsvParser::setCodec(const QString &s) {
|
2017-01-08 19:33:21 -05:00
|
|
|
m_ts.setCodec(QTextCodec::codecForName(s.toLocal8Bit()));
|
|
|
|
}
|
|
|
|
|
2017-02-17 19:51:31 -05:00
|
|
|
void CsvParser::setFieldSeparator(const QChar &c) {
|
2017-01-08 19:33:21 -05:00
|
|
|
m_separator = c.unicode();
|
|
|
|
}
|
|
|
|
|
2017-02-17 19:51:31 -05:00
|
|
|
void CsvParser::setTextQualifier(const QChar &c) {
|
2017-01-08 19:33:21 -05:00
|
|
|
m_qualifier = c.unicode();
|
|
|
|
}
|
|
|
|
|
|
|
|
int CsvParser::getFileSize() const {
|
|
|
|
return m_csv.size();
|
|
|
|
}
|
|
|
|
|
2017-02-17 19:51:31 -05:00
|
|
|
const CsvTable CsvParser::getCsvTable() const {
|
2017-01-08 19:33:21 -05:00
|
|
|
return m_table;
|
|
|
|
}
|
|
|
|
|
|
|
|
QString CsvParser::getStatus() const {
|
|
|
|
return m_statusMsg;
|
|
|
|
}
|
|
|
|
|
|
|
|
int CsvParser::getCsvCols() const {
|
|
|
|
if ((m_table.size() > 0) && (m_table.at(0).size() > 0))
|
|
|
|
return m_table.at(0).size();
|
|
|
|
else return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int CsvParser::getCsvRows() const {
|
|
|
|
return m_table.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-02-21 19:03:22 -05:00
|
|
|
void CsvParser::appendStatusMsg(QString s, bool isCritical) {
|
2017-01-08 19:33:21 -05:00
|
|
|
m_statusMsg += s
|
2017-02-21 19:03:22 -05:00
|
|
|
.append(": (row,col) " + QString::number(m_currRow))
|
2017-01-08 19:33:21 -05:00
|
|
|
.append(",")
|
|
|
|
.append(QString::number(m_currCol))
|
|
|
|
.append("\n");
|
2017-02-21 19:03:22 -05:00
|
|
|
m_isGood = not isCritical;
|
2017-01-08 19:33:21 -05:00
|
|
|
}
|