Improve Discord markdown escaper. Fixes #14

This commit is contained in:
Tulir Asokan 2022-08-20 10:26:03 +03:00
parent 522ac4e2fe
commit 0b84527eab
3 changed files with 96 additions and 12 deletions

View file

@ -91,6 +91,40 @@ func pillConverter(displayname, mxid, eventID string, ctx format.Context) string
return displayname
}
// Discord links start with http:// or https://, contain at least two characters afterwards,
// don't contain < or whitespace anywhere, and don't end with "'),.:;]
//
// Zero-width whitespace is mostly in the Format category and is allowed, except \uFEFF isn't for some reason
var discordLinkRegex = regexp.MustCompile(`https?://[^<\p{Zs}\x{feff}]*[^"'),.:;\]\p{Zs}\x{feff}]`)
var discordMarkdownEscaper = strings.NewReplacer(
`\`, `\\`,
`_`, `\_`,
`*`, `\*`,
`~`, `\~`,
"`", "\\`",
`|`, `\|`,
`<`, `\<`,
)
func escapeDiscordMarkdown(s string) string {
submatches := discordLinkRegex.FindAllStringIndex(s, -1)
if submatches == nil {
return discordMarkdownEscaper.Replace(s)
}
var builder strings.Builder
offset := 0
for _, match := range submatches {
start := match[0]
end := match[1]
builder.WriteString(discordMarkdownEscaper.Replace(s[offset:start]))
builder.WriteString(s[start:end])
offset = end
}
builder.WriteString(discordMarkdownEscaper.Replace(s[offset:]))
return builder.String()
}
var matrixHTMLParser = &format.HTMLParser{
TabsToSpaces: 4,
Newline: "\n",
@ -102,7 +136,7 @@ var matrixHTMLParser = &format.HTMLParser{
return fmt.Sprintf("__%s__", s)
},
TextConverter: func(s string, context format.Context) string {
return discordMarkdownEscaper.Replace(s)
return escapeDiscordMarkdown(s)
},
SpoilerConverter: func(text, reason string, ctx format.Context) string {
if reason != "" {
@ -116,16 +150,6 @@ func init() {
matrixHTMLParser.PillConverter = pillConverter
}
var discordMarkdownEscaper = strings.NewReplacer(
`\`, `\\`,
`_`, `\_`,
`*`, `\*`,
`~`, `\~`,
"`", "\\`",
`|`, `\|`,
`<`, `\<`,
)
func (portal *Portal) parseMatrixHTML(user *User, content *event.MessageEventContent) string {
if content.Format == event.FormatHTML && len(content.FormattedBody) > 0 {
return matrixHTMLParser.Parse(content.FormattedBody, format.Context{
@ -133,6 +157,6 @@ func (portal *Portal) parseMatrixHTML(user *User, content *event.MessageEventCon
formatterContextPortalKey: portal,
})
} else {
return discordMarkdownEscaper.Replace(content.Body)
return escapeDiscordMarkdown(content.Body)
}
}

57
formatter_test.go Normal file
View file

@ -0,0 +1,57 @@
// mautrix-discord - A Matrix-Discord puppeting bridge.
// Copyright (C) 2022 Tulir Asokan
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package main
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestEscapeDiscordMarkdown(t *testing.T) {
type escapeTest struct {
name string
input string
expected string
}
tests := []escapeTest{
{"Simple text", "Lorem ipsum dolor sit amet, consectetuer adipiscing elit.", "Lorem ipsum dolor sit amet, consectetuer adipiscing elit."},
{"Backslash", `foo\bar`, `foo\\bar`},
{"Underscore", `foo_bar`, `foo\_bar`},
{"Asterisk", `foo*bar`, `foo\*bar`},
{"Tilde", `foo~bar`, `foo\~bar`},
{"Backtick", "foo`bar", "foo\\`bar"},
{"Forward tick", `foo´bar`, `foo´bar`},
{"Pipe", `foo|bar`, `foo\|bar`},
{"Less than", `foo<bar`, `foo\<bar`},
{"Greater than", `foo>bar`, `foo>bar`},
{"Multiple things", `\_*~|`, `\\\_\*\~\|`},
{"URL", `https://example.com/foo_bar`, `https://example.com/foo_bar`},
{"Multiple URLs", `hello_world https://example.com/foo_bar *testing* https://a_b_c/*def*`, `hello\_world https://example.com/foo_bar \*testing\* https://a_b_c/*def*`},
{"URL ends with no-break zero-width space", "https://example.com\ufefffoo_bar", "https://example.com\ufefffoo\\_bar"},
{"URL ends with less than", `https://example.com<foo_bar`, `https://example.com<foo\_bar`},
{"Short URL", `https://_`, `https://_`},
{"Insecure URL", `http://example.com/foo_bar`, `http://example.com/foo_bar`},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
assert.Equal(t, test.expected, escapeDiscordMarkdown(test.input))
})
}
}

3
go.mod
View file

@ -9,14 +9,17 @@ require (
github.com/lib/pq v1.10.6
github.com/mattn/go-sqlite3 v1.14.15
github.com/skip2/go-qrcode v0.0.0-20200617195104-da1b6568686e
github.com/stretchr/testify v1.8.0
github.com/yuin/goldmark v1.4.12
maunium.net/go/maulogger/v2 v2.3.2
maunium.net/go/mautrix v0.12.0
)
require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/mattn/go-colorable v0.1.12 // indirect
github.com/mattn/go-isatty v0.0.14 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/rs/zerolog v1.27.0 // indirect
github.com/tidwall/gjson v1.14.1 // indirect
github.com/tidwall/match v1.1.1 // indirect