2023-05-28 11:44:21 -04:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2023 Kyle Reed
|
|
|
|
*
|
|
|
|
* This file is part of PortaPack.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2, or (at your option)
|
|
|
|
* any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; see the file COPYING. If not, write to
|
|
|
|
* the Free Software Foundation, Inc., 51 Franklin Street,
|
|
|
|
* Boston, MA 02110-1301, USA.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __FILE_WRAPPER_HPP__
|
|
|
|
#define __FILE_WRAPPER_HPP__
|
|
|
|
|
|
|
|
#include "circular_buffer.hpp"
|
|
|
|
#include "file.hpp"
|
|
|
|
#include "optional.hpp"
|
|
|
|
|
|
|
|
#include <memory>
|
2023-06-01 18:45:55 -04:00
|
|
|
#include <string_view>
|
2023-05-28 11:44:21 -04:00
|
|
|
|
|
|
|
enum class LineEnding : uint8_t {
|
|
|
|
LF,
|
|
|
|
CRLF
|
|
|
|
};
|
|
|
|
|
|
|
|
/* TODO:
|
|
|
|
* - CRLF handling.
|
2023-06-01 18:45:55 -04:00
|
|
|
* - Avoid full re-read on edits.
|
|
|
|
* - Would need to read old/new text when editing to track newlines.
|
|
|
|
* - How to surface errors? Exceptions?
|
2023-05-28 11:44:21 -04:00
|
|
|
*/
|
|
|
|
|
2023-06-01 18:45:55 -04:00
|
|
|
/* FatFs docs http://elm-chan.org/fsw/ff/00index_e.html */
|
|
|
|
|
2023-05-28 11:44:21 -04:00
|
|
|
/* BufferType requires the following members
|
|
|
|
* Size size()
|
|
|
|
* Result<Size> read(void* data, Size bytes_to_read)
|
2023-06-01 18:45:55 -04:00
|
|
|
* Result<Size> write(const void* data, Size bytes_to_write)
|
2023-05-28 11:44:21 -04:00
|
|
|
* Result<Offset> seek(uint32_t offset)
|
2023-06-01 18:45:55 -04:00
|
|
|
* Result<Offset> truncate()
|
|
|
|
* Optional<Error> sync()
|
2023-05-28 11:44:21 -04:00
|
|
|
*/
|
|
|
|
|
|
|
|
/* Wraps a buffer and provides an API for accessing lines efficiently. */
|
|
|
|
template <typename BufferType, uint32_t CacheSize>
|
|
|
|
class BufferWrapper {
|
|
|
|
public:
|
|
|
|
using Offset = uint32_t;
|
|
|
|
using Line = uint32_t;
|
|
|
|
using Column = uint32_t;
|
2023-07-11 16:48:36 -04:00
|
|
|
using Size = File::Size;
|
2023-05-28 11:44:21 -04:00
|
|
|
using Range = struct {
|
2023-06-01 18:45:55 -04:00
|
|
|
// Offset of the start, inclusive.
|
2023-05-28 11:44:21 -04:00
|
|
|
Offset start;
|
2023-06-01 18:45:55 -04:00
|
|
|
// Offset of the end, exclusive.
|
2023-05-28 11:44:21 -04:00
|
|
|
Offset end;
|
2023-06-01 18:45:55 -04:00
|
|
|
|
|
|
|
Offset length() const { return end - start; }
|
2023-05-28 11:44:21 -04:00
|
|
|
};
|
|
|
|
|
|
|
|
BufferWrapper(BufferType* buffer)
|
|
|
|
: wrapped_{buffer} {
|
|
|
|
initialize();
|
|
|
|
}
|
|
|
|
virtual ~BufferWrapper() {}
|
|
|
|
|
|
|
|
/* Prevent copies */
|
|
|
|
BufferWrapper(const BufferWrapper&) = delete;
|
|
|
|
BufferWrapper& operator=(const BufferWrapper&) = delete;
|
|
|
|
|
|
|
|
Optional<std::string> get_text(Line line, Column col, Offset length) {
|
2023-06-01 18:45:55 -04:00
|
|
|
std::string buffer;
|
|
|
|
buffer.resize(length);
|
|
|
|
|
|
|
|
auto result = get_text(line, col, &buffer[0], length);
|
|
|
|
if (!result)
|
|
|
|
return {};
|
|
|
|
|
|
|
|
buffer.resize(*result);
|
|
|
|
return buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
Optional<Offset> get_text(Line line, Column col, char* output, Offset length) {
|
2023-05-28 11:44:21 -04:00
|
|
|
auto range = line_range(line);
|
|
|
|
int32_t to_read = length;
|
|
|
|
|
|
|
|
if (!range)
|
|
|
|
return {};
|
|
|
|
|
|
|
|
// Don't read past end of line.
|
|
|
|
if (range->start + col + to_read >= range->end)
|
|
|
|
to_read = range->end - col - range->start;
|
|
|
|
|
|
|
|
if (to_read <= 0)
|
|
|
|
return {};
|
|
|
|
|
2023-06-01 18:45:55 -04:00
|
|
|
return read(range->start + col, output, to_read);
|
2023-05-28 11:44:21 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Gets the size of the buffer in bytes. */
|
2023-07-11 16:48:36 -04:00
|
|
|
Size size() const { return wrapped_->size(); }
|
2023-05-28 11:44:21 -04:00
|
|
|
|
|
|
|
/* Get the count of the lines in the buffer. */
|
|
|
|
uint32_t line_count() const { return line_count_; }
|
|
|
|
|
|
|
|
/* Gets the range of the line if valid. */
|
|
|
|
Optional<Range> line_range(Line line) {
|
|
|
|
ensure_cached(line);
|
|
|
|
|
2023-06-01 18:45:55 -04:00
|
|
|
auto index = index_for_line(line);
|
|
|
|
if (!index)
|
2023-05-28 11:44:21 -04:00
|
|
|
return {};
|
|
|
|
|
2023-06-01 18:45:55 -04:00
|
|
|
auto start = *index == 0 ? start_offset_ : (newlines_[*index - 1] + 1);
|
|
|
|
auto end = newlines_[*index] + 1;
|
2023-05-28 11:44:21 -04:00
|
|
|
|
|
|
|
return Range{start, end};
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Gets the length of the line, or 0 if invalid. */
|
|
|
|
Offset line_length(Line line) {
|
|
|
|
auto range = line_range(line);
|
2023-06-01 18:45:55 -04:00
|
|
|
return range ? range->length() : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Gets the buffer offset of the line & col if valid. */
|
|
|
|
Optional<Offset> get_offset(Line line, Column col) {
|
|
|
|
auto range = line_range(line);
|
2023-05-28 11:44:21 -04:00
|
|
|
|
|
|
|
if (range)
|
2023-06-01 18:45:55 -04:00
|
|
|
return range->start + col;
|
2023-05-28 11:44:21 -04:00
|
|
|
|
2023-06-01 18:45:55 -04:00
|
|
|
return {};
|
2023-05-28 11:44:21 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Gets the index of the first line in the cache.
|
|
|
|
* Only really useful for unit testing or diagnostics. */
|
|
|
|
Offset start_line() { return start_line_; };
|
|
|
|
|
2023-06-01 18:45:55 -04:00
|
|
|
/* Inserts a line before the specified line or at the
|
|
|
|
* end of the buffer if line >= line_count. */
|
|
|
|
void insert_line(Line line) {
|
|
|
|
auto range = line_range(line);
|
|
|
|
|
|
|
|
if (range)
|
|
|
|
replace_range({range->start, range->start}, "\n");
|
|
|
|
else if (line >= line_count_)
|
|
|
|
replace_range({(Offset)size(), (Offset)size()}, "\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Deletes the specified line. */
|
|
|
|
void delete_line(Line line) {
|
|
|
|
auto range = line_range(line);
|
|
|
|
|
|
|
|
if (range)
|
|
|
|
replace_range(*range, {});
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Replace the specified range with the string contents.
|
|
|
|
* A range with start/end set to the same value will insert.
|
|
|
|
* A range with an empty string will delete. */
|
|
|
|
void replace_range(Range range, std::string_view value) {
|
|
|
|
if (range.start > size() || range.end > size() || range.start > range.end)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* If delta_length == 0, it's an overwrite. Could still have
|
|
|
|
* added or removed newlines so caches will need to be rebuilt.
|
|
|
|
* If delta_length > 0, the file needs to grow and content needs
|
|
|
|
* to be shifted forward until the end of the range.
|
|
|
|
* If delta_length < 0, the file needs to be truncated and the
|
|
|
|
* content after the value needs to be shifted backward. */
|
|
|
|
int32_t delta_length = value.length() - range.length();
|
|
|
|
if (delta_length > 0)
|
|
|
|
expand(range.end, delta_length);
|
|
|
|
else if (delta_length < 0)
|
|
|
|
shrink(range.end, delta_length);
|
|
|
|
|
|
|
|
write(range.start, value);
|
|
|
|
wrapped_->sync();
|
|
|
|
rebuild_cache();
|
|
|
|
}
|
|
|
|
|
2023-05-28 11:44:21 -04:00
|
|
|
protected:
|
|
|
|
BufferWrapper() {}
|
|
|
|
|
|
|
|
void set_buffer(BufferType* buffer) {
|
|
|
|
wrapped_ = buffer;
|
|
|
|
initialize();
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
/* Number of newline offsets to cache. */
|
|
|
|
static constexpr Offset max_newlines = CacheSize;
|
|
|
|
|
|
|
|
/* Size of stack buffer used for reading/writing. */
|
2023-06-01 18:45:55 -04:00
|
|
|
static constexpr Offset buffer_size = 512;
|
2023-05-28 11:44:21 -04:00
|
|
|
|
|
|
|
void initialize() {
|
|
|
|
start_offset_ = 0;
|
|
|
|
start_line_ = 0;
|
|
|
|
line_count_ = 0;
|
2023-06-01 18:45:55 -04:00
|
|
|
rebuild_cache();
|
|
|
|
}
|
|
|
|
|
|
|
|
void rebuild_cache() {
|
2023-05-28 11:44:21 -04:00
|
|
|
newlines_.clear();
|
|
|
|
|
|
|
|
// Special case for empty files to keep them consistent.
|
|
|
|
if (size() == 0) {
|
|
|
|
line_count_ = 1;
|
|
|
|
newlines_.push_back(0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2023-06-01 18:45:55 -04:00
|
|
|
// TODO: think through this for edit cases.
|
|
|
|
// E.g. don't read to end, maybe could specify
|
|
|
|
// a range to re-read because it should be possible
|
|
|
|
// to tell where the dirty regions are. After the
|
|
|
|
// dirty region, it should be possible to fixup
|
|
|
|
// the line_count data.
|
|
|
|
// TODO: seems like shrink/expand could do this while
|
|
|
|
// they are running.
|
|
|
|
|
|
|
|
line_count_ = start_line_;
|
|
|
|
Offset offset = start_offset_;
|
2023-05-28 11:44:21 -04:00
|
|
|
auto result = next_newline(offset);
|
|
|
|
|
|
|
|
while (result) {
|
|
|
|
++line_count_;
|
|
|
|
if (newlines_.size() < max_newlines)
|
|
|
|
newlines_.push_back(*result);
|
|
|
|
offset = *result + 1;
|
|
|
|
result = next_newline(offset);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-06-01 18:45:55 -04:00
|
|
|
Optional<Offset> read(Offset offset, char* buffer, Offset length) {
|
2023-05-28 11:44:21 -04:00
|
|
|
if (offset + length > size())
|
|
|
|
return {};
|
|
|
|
|
|
|
|
wrapped_->seek(offset);
|
|
|
|
|
2023-06-01 18:45:55 -04:00
|
|
|
auto result = wrapped_->read(buffer, length);
|
2023-05-28 11:44:21 -04:00
|
|
|
if (result.is_error())
|
2023-06-01 18:45:55 -04:00
|
|
|
return {};
|
2023-05-28 11:44:21 -04:00
|
|
|
|
2023-06-01 18:45:55 -04:00
|
|
|
return *result;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool write(Offset offset, std::string_view value) {
|
|
|
|
wrapped_->seek(offset);
|
|
|
|
auto result = wrapped_->write(value.data(), value.length());
|
|
|
|
|
|
|
|
return result.is_ok();
|
2023-05-28 11:44:21 -04:00
|
|
|
}
|
|
|
|
|
2023-06-01 18:45:55 -04:00
|
|
|
/* Returns the index of the line in the newline cache if valid. */
|
|
|
|
Optional<Offset> index_for_line(Line line) const {
|
2023-05-28 11:44:21 -04:00
|
|
|
if (line >= line_count_)
|
|
|
|
return {};
|
|
|
|
|
|
|
|
Offset actual = line - start_line_;
|
|
|
|
if (actual >= newlines_.size()) // NB: underflow wrap.
|
|
|
|
return {};
|
|
|
|
|
|
|
|
return actual;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Ensure specified line is in the newline cache. */
|
|
|
|
void ensure_cached(Line line) {
|
|
|
|
if (line >= line_count_)
|
|
|
|
return;
|
|
|
|
|
2023-06-01 18:45:55 -04:00
|
|
|
auto index = index_for_line(line);
|
|
|
|
if (index)
|
2023-05-28 11:44:21 -04:00
|
|
|
return;
|
|
|
|
|
|
|
|
if (line < start_line_) {
|
|
|
|
while (line < start_line_ && start_offset_ >= 2) {
|
|
|
|
// start_offset_ - 1 should be a newline. Need to
|
|
|
|
// find the new value for start_offset_. start_line_
|
|
|
|
// has to be > 0 to get into this block so there should
|
|
|
|
// always be one newline before start_offset_.
|
|
|
|
auto offset = previous_newline(start_offset_ - 2);
|
|
|
|
newlines_.push_front(start_offset_ - 1);
|
|
|
|
|
|
|
|
if (!offset) {
|
|
|
|
// Must be at beginning.
|
|
|
|
start_line_ = 0;
|
|
|
|
start_offset_ = 0;
|
|
|
|
} else {
|
|
|
|
// Found an previous newline, the new start_line_
|
|
|
|
// starts at the newline offset + 1.
|
|
|
|
start_line_--;
|
|
|
|
start_offset_ = *offset + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
while (line >= start_line_ + newlines_.size()) {
|
|
|
|
auto offset = next_newline(newlines_.back() + 1);
|
|
|
|
if (offset) {
|
|
|
|
start_line_++;
|
|
|
|
start_offset_ = newlines_.front() + 1;
|
|
|
|
newlines_.push_back(*offset);
|
|
|
|
} /* else at the EOF. */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-06-01 18:45:55 -04:00
|
|
|
/* Finding the first newline backward from offset. */
|
2023-05-28 11:44:21 -04:00
|
|
|
Optional<Offset> previous_newline(Offset offset) {
|
|
|
|
char buffer[buffer_size];
|
|
|
|
auto to_read = buffer_size;
|
|
|
|
|
|
|
|
do {
|
|
|
|
if (offset < to_read) {
|
|
|
|
// NB: Char at 'offset' was read in the previous iteration.
|
|
|
|
to_read = offset;
|
|
|
|
offset = 0;
|
|
|
|
} else
|
|
|
|
offset -= to_read;
|
|
|
|
|
|
|
|
wrapped_->seek(offset);
|
|
|
|
|
|
|
|
auto result = wrapped_->read(buffer, to_read);
|
|
|
|
if (result.is_error())
|
|
|
|
break;
|
|
|
|
|
|
|
|
// Find newlines in the buffer backwards.
|
|
|
|
for (int32_t i = *result - 1; i >= 0; --i) {
|
|
|
|
switch (buffer[i]) {
|
|
|
|
case '\n':
|
|
|
|
return offset + i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (offset == 0)
|
|
|
|
break;
|
|
|
|
|
|
|
|
} while (true);
|
|
|
|
|
|
|
|
return {}; // Didn't find one.
|
|
|
|
}
|
|
|
|
|
2023-06-01 18:45:55 -04:00
|
|
|
/* Finding the first newline forward from offset. */
|
2023-05-28 11:44:21 -04:00
|
|
|
Optional<Offset> next_newline(Offset offset) {
|
|
|
|
// EOF, no more newlines to find.
|
|
|
|
if (offset >= size())
|
|
|
|
return {};
|
|
|
|
|
|
|
|
char buffer[buffer_size];
|
|
|
|
wrapped_->seek(offset);
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
auto result = wrapped_->read(buffer, buffer_size);
|
|
|
|
if (result.is_error())
|
|
|
|
return {};
|
|
|
|
|
|
|
|
// Find newlines in the buffer.
|
|
|
|
for (Offset i = 0; i < *result; ++i) {
|
|
|
|
switch (buffer[i]) {
|
|
|
|
case '\n':
|
|
|
|
return offset + i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
offset += *result;
|
|
|
|
|
|
|
|
if (*result < buffer_size)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// For consistency, treat the end of the file as a "newline".
|
|
|
|
return size() - 1;
|
|
|
|
}
|
|
|
|
|
2023-06-01 18:45:55 -04:00
|
|
|
/* Grow the file and move file content so that the
|
|
|
|
* content at src is shifted forward by 'delta'. */
|
|
|
|
void expand(Offset src, int32_t delta) {
|
|
|
|
if (delta <= 0) // Not an expand.
|
|
|
|
return;
|
|
|
|
|
|
|
|
char buffer[buffer_size];
|
|
|
|
auto to_read = buffer_size;
|
|
|
|
|
|
|
|
// Number of bytes left to shift.
|
|
|
|
Offset remaining = size() - src;
|
|
|
|
Offset offset = size();
|
|
|
|
|
|
|
|
while (remaining > 0) {
|
|
|
|
offset -= std::min(remaining, buffer_size);
|
|
|
|
to_read = std::min(remaining, buffer_size);
|
|
|
|
|
|
|
|
wrapped_->seek(offset);
|
|
|
|
auto result = wrapped_->read(buffer, to_read);
|
|
|
|
if (result.is_error())
|
|
|
|
break;
|
|
|
|
|
|
|
|
wrapped_->seek(offset + delta);
|
|
|
|
result = wrapped_->write(buffer, *result);
|
|
|
|
if (result.is_error())
|
|
|
|
break;
|
|
|
|
|
|
|
|
remaining -= *result;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Shrink the file and move file content so that the
|
|
|
|
* content at src is shifted backward by 'delta'. */
|
|
|
|
void shrink(Offset src, int32_t delta) {
|
|
|
|
if (delta >= 0) // Not a shrink.
|
|
|
|
return;
|
|
|
|
|
|
|
|
char buffer[buffer_size];
|
|
|
|
auto offset = src;
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
wrapped_->seek(offset);
|
|
|
|
auto result = wrapped_->read(buffer, buffer_size);
|
|
|
|
if (result.is_error())
|
|
|
|
break;
|
|
|
|
|
|
|
|
wrapped_->seek(offset + delta);
|
|
|
|
result = wrapped_->write(buffer, *result);
|
|
|
|
|
|
|
|
if (result.is_error() || *result < buffer_size)
|
|
|
|
break;
|
|
|
|
|
|
|
|
offset += *result;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Delete the extra bytes at the end of the file.
|
|
|
|
wrapped_->truncate();
|
|
|
|
}
|
|
|
|
|
2023-05-28 11:44:21 -04:00
|
|
|
BufferType* wrapped_{};
|
|
|
|
|
|
|
|
/* Total number of lines in the buffer. */
|
|
|
|
Offset line_count_{0};
|
|
|
|
|
|
|
|
/* The offset and line of the newlines cache. */
|
|
|
|
Offset start_offset_{0};
|
|
|
|
Offset start_line_{0};
|
|
|
|
|
|
|
|
LineEnding line_ending_{LineEnding::LF};
|
|
|
|
CircularBuffer<Offset, max_newlines + 1> newlines_{};
|
|
|
|
};
|
|
|
|
|
|
|
|
/* A BufferWrapper over a file. */
|
|
|
|
class FileWrapper : public BufferWrapper<File, 64> {
|
|
|
|
public:
|
|
|
|
template <typename T>
|
|
|
|
using Result = File::Result<T>;
|
|
|
|
using Error = File::Error;
|
2023-07-17 14:43:37 -04:00
|
|
|
static Result<std::unique_ptr<FileWrapper>> open(const std::filesystem::path& path, bool create = false) {
|
2023-05-28 11:44:21 -04:00
|
|
|
auto fw = std::unique_ptr<FileWrapper>(new FileWrapper());
|
2023-07-17 14:43:37 -04:00
|
|
|
auto error = fw->file_.open(path, /*read_only*/ false, create);
|
2023-05-28 11:44:21 -04:00
|
|
|
|
|
|
|
if (error)
|
|
|
|
return *error;
|
|
|
|
|
|
|
|
fw->initialize();
|
|
|
|
return fw;
|
|
|
|
}
|
|
|
|
|
2023-06-01 18:45:55 -04:00
|
|
|
/* Underlying file. */
|
|
|
|
File& file() { return file_; }
|
|
|
|
|
|
|
|
/* Swaps out the underlying file for the specified file.
|
|
|
|
* The swapped file is expected have the same contents.
|
|
|
|
* For copy-on-write scenario with a temp file. */
|
|
|
|
bool assume_file(const std::filesystem::path& path) {
|
|
|
|
File file;
|
|
|
|
auto error = file.open(path, /*read_only*/ false);
|
|
|
|
|
|
|
|
if (error)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
file_ = std::move(file);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2023-05-28 11:44:21 -04:00
|
|
|
private:
|
|
|
|
FileWrapper() {}
|
|
|
|
void initialize() {
|
|
|
|
set_buffer(&file_);
|
|
|
|
}
|
|
|
|
|
|
|
|
File file_{};
|
|
|
|
};
|
|
|
|
|
|
|
|
template <uint32_t CacheSize = 64, typename T>
|
|
|
|
BufferWrapper<T, CacheSize> wrap_buffer(T& buffer) {
|
|
|
|
return {&buffer};
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif // __FILE_WRAPPER_HPP__
|