diff --git a/formatter.go b/formatter.go
index f3dcef5..d94f35d 100644
--- a/formatter.go
+++ b/formatter.go
@@ -91,6 +91,40 @@ func pillConverter(displayname, mxid, eventID string, ctx format.Context) string
return displayname
}
+// Discord links start with http:// or https://, contain at least two characters afterwards,
+// don't contain < or whitespace anywhere, and don't end with "'),.:;]
+//
+// Zero-width whitespace is mostly in the Format category and is allowed, except \uFEFF isn't for some reason
+var discordLinkRegex = regexp.MustCompile(`https?://[^<\p{Zs}\x{feff}]*[^"'),.:;\]\p{Zs}\x{feff}]`)
+
+var discordMarkdownEscaper = strings.NewReplacer(
+ `\`, `\\`,
+ `_`, `\_`,
+ `*`, `\*`,
+ `~`, `\~`,
+ "`", "\\`",
+ `|`, `\|`,
+ `<`, `\<`,
+)
+
+func escapeDiscordMarkdown(s string) string {
+ submatches := discordLinkRegex.FindAllStringIndex(s, -1)
+ if submatches == nil {
+ return discordMarkdownEscaper.Replace(s)
+ }
+ var builder strings.Builder
+ offset := 0
+ for _, match := range submatches {
+ start := match[0]
+ end := match[1]
+ builder.WriteString(discordMarkdownEscaper.Replace(s[offset:start]))
+ builder.WriteString(s[start:end])
+ offset = end
+ }
+ builder.WriteString(discordMarkdownEscaper.Replace(s[offset:]))
+ return builder.String()
+}
+
var matrixHTMLParser = &format.HTMLParser{
TabsToSpaces: 4,
Newline: "\n",
@@ -102,7 +136,7 @@ var matrixHTMLParser = &format.HTMLParser{
return fmt.Sprintf("__%s__", s)
},
TextConverter: func(s string, context format.Context) string {
- return discordMarkdownEscaper.Replace(s)
+ return escapeDiscordMarkdown(s)
},
SpoilerConverter: func(text, reason string, ctx format.Context) string {
if reason != "" {
@@ -116,16 +150,6 @@ func init() {
matrixHTMLParser.PillConverter = pillConverter
}
-var discordMarkdownEscaper = strings.NewReplacer(
- `\`, `\\`,
- `_`, `\_`,
- `*`, `\*`,
- `~`, `\~`,
- "`", "\\`",
- `|`, `\|`,
- `<`, `\<`,
-)
-
func (portal *Portal) parseMatrixHTML(user *User, content *event.MessageEventContent) string {
if content.Format == event.FormatHTML && len(content.FormattedBody) > 0 {
return matrixHTMLParser.Parse(content.FormattedBody, format.Context{
@@ -133,6 +157,6 @@ func (portal *Portal) parseMatrixHTML(user *User, content *event.MessageEventCon
formatterContextPortalKey: portal,
})
} else {
- return discordMarkdownEscaper.Replace(content.Body)
+ return escapeDiscordMarkdown(content.Body)
}
}
diff --git a/formatter_test.go b/formatter_test.go
new file mode 100644
index 0000000..c05f95b
--- /dev/null
+++ b/formatter_test.go
@@ -0,0 +1,57 @@
+// mautrix-discord - A Matrix-Discord puppeting bridge.
+// Copyright (C) 2022 Tulir Asokan
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+package main
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestEscapeDiscordMarkdown(t *testing.T) {
+ type escapeTest struct {
+ name string
+ input string
+ expected string
+ }
+
+ tests := []escapeTest{
+ {"Simple text", "Lorem ipsum dolor sit amet, consectetuer adipiscing elit.", "Lorem ipsum dolor sit amet, consectetuer adipiscing elit."},
+ {"Backslash", `foo\bar`, `foo\\bar`},
+ {"Underscore", `foo_bar`, `foo\_bar`},
+ {"Asterisk", `foo*bar`, `foo\*bar`},
+ {"Tilde", `foo~bar`, `foo\~bar`},
+ {"Backtick", "foo`bar", "foo\\`bar"},
+ {"Forward tick", `foo´bar`, `foo´bar`},
+ {"Pipe", `foo|bar`, `foo\|bar`},
+ {"Less than", `foobar`, `foo>bar`},
+ {"Multiple things", `\_*~|`, `\\\_\*\~\|`},
+ {"URL", `https://example.com/foo_bar`, `https://example.com/foo_bar`},
+ {"Multiple URLs", `hello_world https://example.com/foo_bar *testing* https://a_b_c/*def*`, `hello\_world https://example.com/foo_bar \*testing\* https://a_b_c/*def*`},
+ {"URL ends with no-break zero-width space", "https://example.com\ufefffoo_bar", "https://example.com\ufefffoo\\_bar"},
+ {"URL ends with less than", `https://example.com