Support accent characters in cite names.

A new commit introduces a cite name that contains an accent character:
@inproceedings{Müller2024a,

It will cause the bibtex parser to fail with the following two errors:

failed to extract cite name of: @inproceedings{Müller2024a,

and

parse failed at 55:17: syntax error: unexpected $end, expecting tCOMM

The second error is an upstream limitation,
which I will try to get it merged to the upstream: https://github.com/nickng/bibtex.
This commit is contained in:
gfw-report 2024-07-20 00:00:00 +00:00
parent 863792024d
commit 7a1e73c11d
2 changed files with 14 additions and 3 deletions

View file

@ -13,8 +13,9 @@ import (
"github.com/nickng/bibtex"
)
// Matches e.g.: @inproceedings{Doe2024a,
var re = regexp.MustCompile(`@[a-z]*\{([A-Za-z\-]*[0-9]{4}[a-z]),`)
// Matches e.g.: @inproceedings{Müller2024a,
// \p{L}\p{M} matches any letter, including accented characters.
var re = regexp.MustCompile(`@[a-z]*\{([\"\p{L}\p{M}\-]*[0-9]{4}[a-z]),`)
// Map a cite name (e.g., Doe2024a) to its line number in the .bib file. All
// cite names are unique.

View file

@ -29,8 +29,18 @@ func isWhitespace(ch rune) bool {
return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
}
func isAccent(ch rune) bool {
accents := "äöüßéêçñÁÉÍÓÚáéíóúàèìòùâêîôûãõñÄÖÜ"
for _, accent := range accents {
if ch == accent {
return true
}
}
return false
}
func isAlpha(ch rune) bool {
return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')
return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || isAccent(ch)
}
func isDigit(ch rune) bool {