Skip to content

Commit

Permalink
RF: social tags checker (#39)
Browse files Browse the repository at this point in the history
* RF: social tags checker

* Add some tests
  • Loading branch information
kynrai committed Jun 11, 2024
1 parent e51e0ee commit b00c48a
Show file tree
Hide file tree
Showing 6 changed files with 224 additions and 176 deletions.
10 changes: 6 additions & 4 deletions checks/checks.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,18 @@ import (
)

type Checks struct {
Carbon *Carbon
Rank *Rank
Carbon *Carbon
Rank *Rank
SocialTags *SocialTags
}

func NewChecks() *Checks {
client := &http.Client{
Timeout: 5 * time.Second,
}
return &Checks{
Carbon: NewCarbon(client),
Rank: NewRank(client),
Carbon: NewCarbon(client),
Rank: NewRank(client),
SocialTags: NewSocialTags(client),
}
}
94 changes: 94 additions & 0 deletions checks/social_tags.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package checks

import (
"context"
"net/http"

"github.com/PuerkitoBio/goquery"
)

type SocialTagsData struct {
Title string `json:"title"`
Description string `json:"description"`
Keywords string `json:"keywords"`
CanonicalUrl string `json:"canonicalUrl"`
OgTitle string `json:"ogTitle"`
OgType string `json:"ogType"`
OgImage string `json:"ogImage"`
OgUrl string `json:"ogUrl"`
OgDescription string `json:"ogDescription"`
OgSiteName string `json:"ogSiteName"`
TwitterCard string `json:"twitterCard"`
TwitterSite string `json:"twitterSite"`
TwitterCreator string `json:"twitterCreator"`
TwitterTitle string `json:"twitterTitle"`
TwitterDescription string `json:"twitterDescription"`
TwitterImage string `json:"twitterImage"`
ThemeColor string `json:"themeColor"`
Robots string `json:"robots"`
Googlebot string `json:"googlebot"`
Generator string `json:"generator"`
Viewport string `json:"viewport"`
Author string `json:"author"`
Publisher string `json:"publisher"`
Favicon string `json:"favicon"`
}

func (s SocialTagsData) Empty() bool {
return (SocialTagsData{}) == s
}

type SocialTags struct {
client *http.Client
}

func NewSocialTags(client *http.Client) *SocialTags {
return &SocialTags{client: client}
}

func (s *SocialTags) GetSocialTags(ctx context.Context, url string) (*SocialTagsData, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, err
}
resp, err := s.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()

// Parse HTML document
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, err
}

// Extract social tags metadata
tags := &SocialTagsData{
Title: doc.Find("head title").Text(),
Description: doc.Find("meta[name='description']").AttrOr("content", ""),
Keywords: doc.Find("meta[name='keywords']").AttrOr("content", ""),
CanonicalUrl: doc.Find("link[rel='canonical']").AttrOr("href", ""),
OgTitle: doc.Find("meta[property='og:title']").AttrOr("content", ""),
OgType: doc.Find("meta[property='og:type']").AttrOr("content", ""),
OgImage: doc.Find("meta[property='og:image']").AttrOr("content", ""),
OgUrl: doc.Find("meta[property='og:url']").AttrOr("content", ""),
OgDescription: doc.Find("meta[property='og:description']").AttrOr("content", ""),
OgSiteName: doc.Find("meta[property='og:site_name']").AttrOr("content", ""),
TwitterCard: doc.Find("meta[name='twitter:card']").AttrOr("content", ""),
TwitterSite: doc.Find("meta[name='twitter:site']").AttrOr("content", ""),
TwitterCreator: doc.Find("meta[name='twitter:creator']").AttrOr("content", ""),
TwitterTitle: doc.Find("meta[name='twitter:title']").AttrOr("content", ""),
TwitterDescription: doc.Find("meta[name='twitter:description']").AttrOr("content", ""),
TwitterImage: doc.Find("meta[name='twitter:image']").AttrOr("content", ""),
ThemeColor: doc.Find("meta[name='theme-color']").AttrOr("content", ""),
Robots: doc.Find("meta[name='robots']").AttrOr("content", ""),
Googlebot: doc.Find("meta[name='googlebot']").AttrOr("content", ""),
Generator: doc.Find("meta[name='generator']").AttrOr("content", ""),
Viewport: doc.Find("meta[name='viewport']").AttrOr("content", ""),
Author: doc.Find("meta[name='author']").AttrOr("content", ""),
Publisher: doc.Find("link[rel='publisher']").AttrOr("href", ""),
Favicon: doc.Find("link[rel='icon']").AttrOr("href", ""),
}
return tags, nil
}
65 changes: 65 additions & 0 deletions checks/social_tags_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package checks

import (
"context"
"net/http"
"testing"

"github.com/stretchr/testify/assert"
"github.com/xray-web/web-check-api/testutils"
)

func TestSocialTagsEmpty(t *testing.T) {
t.Parallel()

t.Run("Empty", func(t *testing.T) {
t.Parallel()

s := SocialTagsData{}
assert.True(t, s.Empty())
})

t.Run("Not empty", func(t *testing.T) {
t.Parallel()

s := SocialTagsData{
Title: "Example Domain",
}
assert.False(t, s.Empty())
})
}

func TestNewSocialTags(t *testing.T) {
t.Parallel()

t.Run("No social tags", func(t *testing.T) {
t.Parallel()

client := testutils.MockClient(testutils.Response(http.StatusOK, []byte{}))
tags, err := NewSocialTags(client).GetSocialTags(context.TODO(), "http://example.com")
assert.NoError(t, err)
assert.True(t, tags.Empty())
})

t.Run("Social tags", func(t *testing.T) {
t.Parallel()

var html = []byte(`
<html>
<head>
<title>Example Domain</title>
<meta name="description" content="Example description">
<meta property="og:title" content="Example OG Title">
</head>
<body></body>
</html>
`)
client := testutils.MockClient(testutils.Response(http.StatusOK, html))
tags, err := NewSocialTags(client).GetSocialTags(context.TODO(), "http://example.com")
assert.NoError(t, err)
assert.False(t, tags.Empty())
assert.Equal(t, "Example description", tags.Description)
assert.Equal(t, "Example Domain", tags.Title)
assert.Equal(t, "Example OG Title", tags.OgTitle)
})
}
105 changes: 4 additions & 101 deletions handlers/social_tags.go
Original file line number Diff line number Diff line change
@@ -1,120 +1,23 @@
package handlers

import (
"errors"
"net/http"

"github.com/PuerkitoBio/goquery"
"github.com/xray-web/web-check-api/checks"
)

type SocialTags struct {
Title string `json:"title"`
Description string `json:"description"`
Keywords string `json:"keywords"`
CanonicalUrl string `json:"canonicalUrl"`
OgTitle string `json:"ogTitle"`
OgType string `json:"ogType"`
OgImage string `json:"ogImage"`
OgUrl string `json:"ogUrl"`
OgDescription string `json:"ogDescription"`
OgSiteName string `json:"ogSiteName"`
TwitterCard string `json:"twitterCard"`
TwitterSite string `json:"twitterSite"`
TwitterCreator string `json:"twitterCreator"`
TwitterTitle string `json:"twitterTitle"`
TwitterDescription string `json:"twitterDescription"`
TwitterImage string `json:"twitterImage"`
ThemeColor string `json:"themeColor"`
Robots string `json:"robots"`
Googlebot string `json:"googlebot"`
Generator string `json:"generator"`
Viewport string `json:"viewport"`
Author string `json:"author"`
Publisher string `json:"publisher"`
Favicon string `json:"favicon"`
}

func isEmpty(tags *SocialTags) bool {
return tags.Title == "" &&
tags.Description == "" &&
tags.Keywords == "" &&
tags.CanonicalUrl == "" &&
tags.OgTitle == "" &&
tags.OgType == "" &&
tags.OgImage == "" &&
tags.OgUrl == "" &&
tags.OgDescription == "" &&
tags.OgSiteName == "" &&
tags.TwitterCard == "" &&
tags.TwitterSite == "" &&
tags.TwitterCreator == "" &&
tags.TwitterTitle == "" &&
tags.TwitterDescription == "" &&
tags.TwitterImage == "" &&
tags.ThemeColor == "" &&
tags.Robots == "" &&
tags.Googlebot == "" &&
tags.Generator == "" &&
tags.Viewport == "" &&
tags.Author == "" &&
tags.Publisher == "" &&
tags.Favicon == ""
}

func HandleGetSocialTags() http.Handler {
func HandleGetSocialTags(s *checks.SocialTags) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
rawURL, err := extractURL(r)
if err != nil {
JSONError(w, ErrMissingURLParameter, http.StatusBadRequest)
return
}

// Fetch HTML content from the URL
resp, err := http.Get(rawURL.String())
tags, err := s.GetSocialTags(r.Context(), rawURL.String())
if err != nil {
JSONError(w, err, http.StatusInternalServerError)
return
}
defer resp.Body.Close()

// Parse HTML document
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return
}

// Extract social tags metadata
tags := &SocialTags{
Title: doc.Find("head title").Text(),
Description: doc.Find("meta[name='description']").AttrOr("content", ""),
Keywords: doc.Find("meta[name='keywords']").AttrOr("content", ""),
CanonicalUrl: doc.Find("link[rel='canonical']").AttrOr("href", ""),
OgTitle: doc.Find("meta[property='og:title']").AttrOr("content", ""),
OgType: doc.Find("meta[property='og:type']").AttrOr("content", ""),
OgImage: doc.Find("meta[property='og:image']").AttrOr("content", ""),
OgUrl: doc.Find("meta[property='og:url']").AttrOr("content", ""),
OgDescription: doc.Find("meta[property='og:description']").AttrOr("content", ""),
OgSiteName: doc.Find("meta[property='og:site_name']").AttrOr("content", ""),
TwitterCard: doc.Find("meta[name='twitter:card']").AttrOr("content", ""),
TwitterSite: doc.Find("meta[name='twitter:site']").AttrOr("content", ""),
TwitterCreator: doc.Find("meta[name='twitter:creator']").AttrOr("content", ""),
TwitterTitle: doc.Find("meta[name='twitter:title']").AttrOr("content", ""),
TwitterDescription: doc.Find("meta[name='twitter:description']").AttrOr("content", ""),
TwitterImage: doc.Find("meta[name='twitter:image']").AttrOr("content", ""),
ThemeColor: doc.Find("meta[name='theme-color']").AttrOr("content", ""),
Robots: doc.Find("meta[name='robots']").AttrOr("content", ""),
Googlebot: doc.Find("meta[name='googlebot']").AttrOr("content", ""),
Generator: doc.Find("meta[name='generator']").AttrOr("content", ""),
Viewport: doc.Find("meta[name='viewport']").AttrOr("content", ""),
Author: doc.Find("meta[name='author']").AttrOr("content", ""),
Publisher: doc.Find("link[rel='publisher']").AttrOr("href", ""),
Favicon: doc.Find("link[rel='icon']").AttrOr("href", ""),
}

if isEmpty(tags) {
JSONError(w, errors.New("no metadata found"), http.StatusBadRequest)
return
}

JSON(w, tags, http.StatusOK)
})
}
Loading

0 comments on commit b00c48a

Please sign in to comment.