mirror of
https://github.com/NullHypothesis/censorbib.git
synced 2025-07-22 06:09:10 -04:00
Support accent characters in cite names.
A new commit introduces a cite name that contains an accent character: @inproceedings{Müller2024a, It will cause the bibtex parser to fail with the following two errors: failed to extract cite name of: @inproceedings{Müller2024a, and parse failed at 55:17: syntax error: unexpected $end, expecting tCOMM The second error is an upstream limitation, which I will try to get it merged to the upstream: https://github.com/nickng/bibtex.
This commit is contained in:
parent
863792024d
commit
7a1e73c11d
2 changed files with 14 additions and 3 deletions
|
@ -13,8 +13,9 @@ import (
|
|||
"github.com/nickng/bibtex"
|
||||
)
|
||||
|
||||
// Matches e.g.: @inproceedings{Doe2024a,
|
||||
var re = regexp.MustCompile(`@[a-z]*\{([A-Za-z\-]*[0-9]{4}[a-z]),`)
|
||||
// Matches e.g.: @inproceedings{Müller2024a,
|
||||
// \p{L}\p{M} matches any letter, including accented characters.
|
||||
var re = regexp.MustCompile(`@[a-z]*\{([\"\p{L}\p{M}\-]*[0-9]{4}[a-z]),`)
|
||||
|
||||
// Map a cite name (e.g., Doe2024a) to its line number in the .bib file. All
|
||||
// cite names are unique.
|
||||
|
|
12
src/vendor/github.com/nickng/bibtex/token.go
generated
vendored
12
src/vendor/github.com/nickng/bibtex/token.go
generated
vendored
|
@ -29,8 +29,18 @@ func isWhitespace(ch rune) bool {
|
|||
return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
|
||||
}
|
||||
|
||||
func isAccent(ch rune) bool {
|
||||
accents := "äöüßéêçñÁÉÍÓÚáéíóúàèìòùâêîôûãõñÄÖÜ"
|
||||
for _, accent := range accents {
|
||||
if ch == accent {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isAlpha(ch rune) bool {
|
||||
return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')
|
||||
return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || isAccent(ch)
|
||||
}
|
||||
|
||||
func isDigit(ch rune) bool {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue