url_extractor.go 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. package chat
  2. import (
  3. "strings"
  4. "github.com/diamondburned/arikawa/v3/discord"
  5. "github.com/diamondburned/ningen/v3/discordmd"
  6. "github.com/yuin/goldmark/ast"
  7. "github.com/yuin/goldmark/parser"
  8. "github.com/yuin/goldmark/text"
  9. )
  10. func extractURLs(content string) []string {
  11. src := []byte(content)
  12. node := parser.NewParser(
  13. parser.WithBlockParsers(discordmd.BlockParsers()...),
  14. parser.WithInlineParsers(discordmd.InlineParserWithLink()...),
  15. ).Parse(text.NewReader(src))
  16. var urls []string
  17. ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
  18. if entering {
  19. switch n := n.(type) {
  20. case *ast.AutoLink:
  21. urls = append(urls, string(n.URL(src)))
  22. case *ast.Link:
  23. urls = append(urls, string(n.Destination))
  24. }
  25. }
  26. return ast.WalkContinue, nil
  27. })
  28. return urls
  29. }
  30. func extractEmbedURLs(embeds []discord.Embed) []string {
  31. urls := make([]string, 0, len(embeds)*3)
  32. for _, embed := range embeds {
  33. if embed.URL != "" {
  34. urls = append(urls, string(embed.URL))
  35. }
  36. if embed.Image != nil && embed.Image.URL != "" {
  37. urls = append(urls, string(embed.Image.URL))
  38. }
  39. if embed.Video != nil && embed.Video.URL != "" {
  40. urls = append(urls, string(embed.Video.URL))
  41. }
  42. }
  43. return urls
  44. }
  45. func messageURLs(msg discord.Message) []string {
  46. combined := extractURLs(msg.Content)
  47. combined = append(combined, extractEmbedURLs(msg.Embeds)...)
  48. urls := make([]string, 0, len(combined))
  49. seen := make(map[string]struct{}, len(combined))
  50. for _, u := range combined {
  51. u = strings.TrimSpace(u)
  52. if u == "" {
  53. continue
  54. }
  55. if _, ok := seen[u]; ok {
  56. continue
  57. }
  58. seen[u] = struct{}{}
  59. urls = append(urls, u)
  60. }
  61. return urls
  62. }