mirror of
https://github.com/NullHypothesis/censorbib.git
synced 2025-07-23 06:31:22 -04:00
Support accent characters in cite names.
A new commit introduces a cite name that contains an accent character: @inproceedings{Müller2024a, It will cause the bibtex parser to fail with the following two errors: failed to extract cite name of: @inproceedings{Müller2024a, and parse failed at 55:17: syntax error: unexpected $end, expecting tCOMM The second error is an upstream limitation, which I will try to get it merged to the upstream: https://github.com/nickng/bibtex.
This commit is contained in:
parent
863792024d
commit
7a1e73c11d
2 changed files with 14 additions and 3 deletions
|
@ -13,8 +13,9 @@ import (
|
||||||
"github.com/nickng/bibtex"
|
"github.com/nickng/bibtex"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Matches e.g.: @inproceedings{Doe2024a,
|
// Matches e.g.: @inproceedings{Müller2024a,
|
||||||
var re = regexp.MustCompile(`@[a-z]*\{([A-Za-z\-]*[0-9]{4}[a-z]),`)
|
// \p{L}\p{M} matches any letter, including accented characters.
|
||||||
|
var re = regexp.MustCompile(`@[a-z]*\{([\"\p{L}\p{M}\-]*[0-9]{4}[a-z]),`)
|
||||||
|
|
||||||
// Map a cite name (e.g., Doe2024a) to its line number in the .bib file. All
|
// Map a cite name (e.g., Doe2024a) to its line number in the .bib file. All
|
||||||
// cite names are unique.
|
// cite names are unique.
|
||||||
|
|
12
src/vendor/github.com/nickng/bibtex/token.go
generated
vendored
12
src/vendor/github.com/nickng/bibtex/token.go
generated
vendored
|
@ -29,8 +29,18 @@ func isWhitespace(ch rune) bool {
|
||||||
return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
|
return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isAccent(ch rune) bool {
|
||||||
|
accents := "äöüßéêçñÁÉÍÓÚáéíóúàèìòùâêîôûãõñÄÖÜ"
|
||||||
|
for _, accent := range accents {
|
||||||
|
if ch == accent {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
func isAlpha(ch rune) bool {
|
func isAlpha(ch rune) bool {
|
||||||
return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')
|
return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || isAccent(ch)
|
||||||
}
|
}
|
||||||
|
|
||||||
func isDigit(ch rune) bool {
|
func isDigit(ch rune) bool {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue