Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ Sitemapper.configure do |c|

c.sitemap_host = "https://sitemaps.aws.whatever.com" # default nil

c.index_file_name = "my_index_file_name" # default "sitemap_index"

c.sitemap_file_name = "my_file_name" # default "sitemap"

# The max number of <url> elements to add to each sitemap
c.max_urls = 20 # default 500

Expand Down Expand Up @@ -80,9 +84,11 @@ Same goes for in you want to add an image. Use `Sitemapper::ImageMap` and pass `

## Saving your XML

Sitemapper gives you the raw XML in strings. This gives you the option to save that data however you wish. Maybe you're crazy and want to store it in your DB? Maybe you're running on Heroku and can't just write locally, so you need to ship it off to AWS. What ever the case, you have that freedom.
`Sitemapper.build` gives you the raw XML in strings. This gives you the option to save that data however you wish. Maybe you're crazy and want to store it in your DB? Maybe you're running on Heroku and can't just write locally, so you need to ship it off to AWS. Whatever the case, you have that freedom.

There's a few options you have built in. `LocalStorage`, and `AwsStorage`. These are config options through `config.storage`.

There's a few options you have built in. `LocalStorage`, and `AwsStorage`. These are config options through `config.storage`
You can also use `Sitemapper.stream` to save the XML data one file at a time. This is useful for very large sitemaps which wouldn't fit in memory. This option won't return XML, and will instead use whatever storage (e.g. `LocalStorage` or `AwsStorage`) you have configured.

### LocalStorage

Expand Down Expand Up @@ -137,6 +143,19 @@ Sitemapper.store(sitemaps, "my-prod-bucket/sitemaps")

Lastly, so the searchengines know where your sitemaps are located (unless you aliased `/sitemap_index.xml`), you'll want to update your [robots.txt](http://www.robotstxt.org/) with `Sitemap: https://my-sitemap-host.com`

### Storing files one at a time

Use `Sitemapper.stream` in place of `Sitemapper.build` to save files one at a time. For example:

```crystal
Sitemapper.stream do |builder|
builder.add("/about", changefreq: "yearly", priority: 0.1)
builder.add("/profiles/somedude", changefreq: "always", priority: 0.9)
end
```

`Sitemapper.stream` accepts optional arguments for `host`, `max_url`, `use_index`, `storage`, and `storage_path`.All of them default to the options saved inside `Sitemapper.configure`.

## Notifying Search Engines

Once you have your sitemaps updated, it's usually a good idea to let the search engines know. Generally, they will crawl your site regularly anyway, but this at least gets things moving a little quicker. To do this, you can use the `ping_search_engines` method.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
require "./spec_helper"

describe Sitemapper::Builder do
describe Sitemapper::InMemoryBuilder do
describe "#add" do
it "adds /tacos to the paths" do
builder = Sitemapper::Builder.new(host: "", max_urls: 20, use_index: true)
builder = Sitemapper::InMemoryBuilder.new(host: "", max_urls: 20, use_index: true)
builder.add("/tacos")
builder.paginator.paths.size.should eq 1
end

it "adds /burritors with a changefreq of weekly" do
builder = Sitemapper::Builder.new(host: "", max_urls: 20, use_index: true)
builder = Sitemapper::InMemoryBuilder.new(host: "", max_urls: 20, use_index: true)
builder.add("/burritos", changefreq: "weekly")
builder.paginator.paths.size.should eq 1
end
end

describe "#generate" do
it "returns an array with 1 hash" do
builder = Sitemapper::Builder.new(host: "", max_urls: 20, use_index: false)
builder = Sitemapper::InMemoryBuilder.new(host: "", max_urls: 20, use_index: false)
builder.add("/tacos")
xml = builder.generate
xml.size.should eq 1
Expand All @@ -26,7 +26,7 @@ describe Sitemapper::Builder do
end

it "returns an array with 4 hashes" do
builder = Sitemapper::Builder.new(host: "", max_urls: 1, use_index: true)
builder = Sitemapper::InMemoryBuilder.new(host: "", max_urls: 1, use_index: true)
builder.add("/tacos/1")
builder.add("/tacos/2")
builder.add("/tacos/3")
Expand All @@ -36,7 +36,7 @@ describe Sitemapper::Builder do
end

it "generates some valid sitemap xml" do
builder = Sitemapper::Builder.new(host: "http://food.com", max_urls: 100, use_index: true)
builder = Sitemapper::InMemoryBuilder.new(host: "http://food.com", max_urls: 100, use_index: true)
builder.add("/tacos")
xml = builder.generate.as(Array).first["data"]
xml.should contain <<-XML
Expand All @@ -53,7 +53,7 @@ describe Sitemapper::Builder do
end

it "generates the xml with a video tag data" do
builder = Sitemapper::Builder.new(host: "http://food.com", max_urls: 100, use_index: true)
builder = Sitemapper::InMemoryBuilder.new(host: "http://food.com", max_urls: 100, use_index: true)
video = Sitemapper::VideoMap.new(thumbnail_loc: "http://video.org/sample.mpg", title: "Video", description: "This is a video", tags: ["red", "blue"])
builder.add("/tacos", video: video)
xml = builder.generate.as(Array).first["data"]
Expand All @@ -75,7 +75,7 @@ describe Sitemapper::Builder do
end

it "generates the xml with image tag data" do
builder = Sitemapper::Builder.new(host: "http://food.com", max_urls: 100, use_index: true)
builder = Sitemapper::InMemoryBuilder.new(host: "http://food.com", max_urls: 100, use_index: true)
image = Sitemapper::ImageMap.new(loc: "http://image.org/sample.jpg", caption: "This is an image")
builder.add("/tacos", image: image)
xml = builder.generate.as(Array).first["data"]
Expand All @@ -93,7 +93,7 @@ describe Sitemapper::Builder do
end

it "generates the sitemap_index with the specified host" do
builder = Sitemapper::Builder.new(host: "http://food.com", max_urls: 100, use_index: true)
builder = Sitemapper::InMemoryBuilder.new(host: "http://food.com", max_urls: 100, use_index: true)
builder.add("/burgers")
xml = builder.generate.as(Array).find { |h| h["name"] == "sitemap_index.xml" }.as(Hash(String, String))
xml["data"].should contain <<-XML
Expand All @@ -103,7 +103,7 @@ describe Sitemapper::Builder do

it "generates the sitemap_index with a custom sitemap host" do
Sitemapper.configure { |c| c.sitemap_host = "https://sitemaps.myapp.com" }
builder = Sitemapper::Builder.new(host: "http://food.com", max_urls: 100, use_index: true)
builder = Sitemapper::InMemoryBuilder.new(host: "http://food.com", max_urls: 100, use_index: true)
builder.add("/burgers")
xml = builder.generate.as(Array).find { |h| h["name"] == "sitemap_index.xml" }.as(Hash(String, String))
xml["data"].should contain <<-XML
Expand Down
18 changes: 10 additions & 8 deletions src/sitemapper.cr
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@ require "./sitemapper/video_map"
require "./sitemapper/image_map"
require "./sitemapper/sitemap_options"
require "./sitemapper/paginator"
require "./sitemapper/builder"
require "./sitemapper/builder/*"
require "./sitemapper/storage"
require "./sitemapper/streamer"
require "./sitemapper/storage/*"
require "./sitemapper/ping_bot"

Expand All @@ -19,6 +18,8 @@ module Sitemapper
setting use_index : Bool = false
setting host : String, example: "https://mysite.com"
setting sitemap_host : String? = nil
setting index_file_name : String = "sitemap_index"
setting sitemap_file_name : String = "sitemap"
setting max_urls : Int32 = 500
setting storage : Sitemapper::Storage.class = Sitemapper::LocalStorage
setting compress : Bool = true
Expand All @@ -30,7 +31,7 @@ module Sitemapper
Sitemapper.settings
end

# Build your sitemaps. The block arg is an instance of `Sitemapper::Builder`.
# Build your sitemaps. The block arg is an instance of `Sitemapper::InMemoryBuilder`.
# Args default to the configuration, but can be overriden.
# ```
# Sitemapper.build(max_urls: 20) do |builder|
Expand All @@ -43,12 +44,13 @@ module Sitemapper
use_index : Bool = config.use_index,
&
) : Array(Hash(String, String))
builder = Sitemapper::Builder.new(host, max_urls, use_index)
builder = Sitemapper::InMemoryBuilder.new(host, max_urls, use_index)
yield builder
builder.generate
end

# Build your sitemaps, streaming each file. The block arg is an instance of `Sitemapper::Streamer`.
# Build your sitemaps, saving each file once it reaches `max_urls`.
# The block arg is an instance of `Sitemapper::StreamBuilder`.
# Args default to the configuration, but can be overriden.
# ```
# Sitemapper.stream(path: "tmp/sitemaps") do |builder|
Expand All @@ -62,10 +64,10 @@ module Sitemapper
storage : Sitemapper::Storage.class = config.storage,
storage_path : String = config.storage_path,
&
) : Array(Hash(String, String))
builder = Sitemapper::Streamer.new(host, max_urls, use_index, storage, storage_path)
) : Void
builder = Sitemapper::StreamBuilder.new(host, max_urls, use_index, storage, storage_path)
yield builder
builder.generate
builder.finish
end

# Store your sitemap xml files.
Expand Down
29 changes: 22 additions & 7 deletions src/sitemapper/builder.cr
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module Sitemapper
class Builder
abstract class Builder
XMLNS_SCHEMA = "http://www.sitemaps.org/schemas/sitemap/0.9"
XMLNS_VIDEO_SCHEMA = "http://www.google.com/schemas/sitemap-video/1.1"
XMLNS_IMAGE_SCHEMA = "http://www.google.com/schemas/sitemap-image/1.1"
Expand All @@ -21,6 +21,11 @@ module Sitemapper
self
end

def index_add(path) : self
paginator.index_add(path)
self
end

def generate : Array(Hash(String, String))
paginator.total_pages.times do |page|
filename = filename_for_page(page)
Expand Down Expand Up @@ -59,7 +64,7 @@ module Sitemapper
end

private def build_xml_for_page(items)
XML.build(indent: " ", version: "1.0", encoding: "UTF-8") do |xml|
XML.build(indent: " ") do |xml|
xml.element("urlset", xmlns: XMLNS_SCHEMA, "xmlns:video": XMLNS_VIDEO_SCHEMA, "xmlns:image": XMLNS_IMAGE_SCHEMA, "xmlns:xsi": XMLNS_XSI, "xsi:schemaLocation": XSI_SCHEMA_LOCATION) do
items.each do |info|
build_xml_from_info(xml, info)
Expand All @@ -86,12 +91,22 @@ module Sitemapper
end
end

private def filename_for_page(page)
if paginator.total_pages == 1
"sitemap.xml"
else
"sitemap#{page + 1}.xml"
private def generate_index(filenames : Array(String)) : Hash(String, String)
doc = XML.build(indent: " ") do |xml|
xml.element("sitemapindex", xmlns: XMLNS_SCHEMA, "xmlns:video": XMLNS_VIDEO_SCHEMA, "xmlns:image": XMLNS_IMAGE_SCHEMA, "xmlns:xsi": XMLNS_XSI, "xsi:schemaLocation": XSI_INDEX_SCHEMA_LOCATION) do
filenames.each do |filename|
xml.element("sitemap") do
sitemap_name = filename + (Sitemapper.config.compress ? ".gz" : "")
sitemap_url = [(Sitemapper.config.sitemap_host || @host), sitemap_name].join('/')

xml.element("loc") { xml.text sitemap_url }
xml.element("lastmod") { xml.text Time.utc.to_s("%FT%X%:z") }
end
end
end
end
filename = Sitemapper.config.index_file_name + ".xml"
{"name" => filename, "data" => doc}
end
end
end
59 changes: 59 additions & 0 deletions src/sitemapper/builder/in_memory_builder.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
require "../builder"

module Sitemapper
# This class builds a list of sitemaps in memory, but doesn't save them. The
# caller must eventually call `Sitemapper.store` to save the resulting list
# of sitemaps.
class InMemoryBuilder < Builder
XMLNS_SCHEMA = "http://www.sitemaps.org/schemas/sitemap/0.9"
XMLNS_VIDEO_SCHEMA = "http://www.google.com/schemas/sitemap-video/1.1"
XMLNS_IMAGE_SCHEMA = "http://www.google.com/schemas/sitemap-image/1.1"
# See: https://sitemaps.org/protocol.html#validating
XMLNS_XSI = "http://www.w3.org/2001/XMLSchema-instance"
XSI_SCHEMA_LOCATION = "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
XSI_INDEX_SCHEMA_LOCATION = "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd"

getter paginator : Paginator

def initialize(@host : String, @max_urls : Int32, @use_index : Bool)
@paginator = Paginator.new(limit: @max_urls)
@sitemaps = [] of Hash(String, String)
end

def add(path, **kwargs) : self
options = SitemapOptions.new(**kwargs)
paginator.add(path, options)
self
end

def index_add(path) : self
paginator.index_add(path)
self
end

def generate : Array(Hash(String, String))
paginator.total_pages.times do |page|
filename = filename_for_page(page)
doc = build_xml_for_page(paginator.items(page + 1))

@sitemaps << {"name" => filename, "data" => doc}
end

if @use_index
filenames = paginator.index_items
filenames += @sitemaps.map { |sitemap| sitemap["name"] }
@sitemaps << generate_index(filenames)
end

@sitemaps
end

private def filename_for_page(page)
if paginator.total_pages == 1
Sitemapper.config.sitemap_file_name + ".xml"
else
Sitemapper.config.sitemap_file_name + "#{page + 1}.xml"
end
end
end
end
64 changes: 64 additions & 0 deletions src/sitemapper/builder/stream_builder.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
require "../builder"

module Sitemapper
# This class builds sitemap files one at a time, saving each as it reaches
# the limit of `@max_urls`. Callers don't need to call `Sitemapper.store`
# afterwards.
class StreamBuilder < Builder
getter paginator : Paginator

def initialize(@host : String, @max_urls : Int32, @use_index : Bool, @storage : Sitemapper::Storage.class, @storage_path : String)
@paginator = Paginator.new(limit: @max_urls)
@filenames = [] of String
@index_filenames = [] of String
@sitemaps = [] of Hash(String, String)
@current_page = 1
end

def add(path, **kwargs) : self
options = SitemapOptions.new(**kwargs)
paginator.add(path, options)
if paginator.paths.size.modulo(@max_urls).zero?
flush
end
self
end

def index_add(path) : self
@index_filenames << path
self
end

def flush
filename = filename_for_page(@current_page)
doc = build_xml_for_page(paginator.items(1))
@filenames << filename

storage = @storage.new([{"name" => filename, "data" => doc}])
storage.save(@storage_path)

@current_page += 1
@paginator = Paginator.new(limit: @max_urls)
end

def finish : Void
unless paginator.paths.empty?
flush
end

if @use_index
save_index
end
end

private def save_index : Void
index = generate_index(@index_filenames + @filenames)
storage = @storage.new([index])
storage.save(@storage_path)
end

private def filename_for_page(page)
Sitemapper.config.sitemap_file_name + "#{page}.xml"
end
end
end
10 changes: 10 additions & 0 deletions src/sitemapper/paginator.cr
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,30 @@ module Sitemapper
class Paginator
DEFAULT_LIMIT = 500
property paths : Array(Tuple(String, SitemapOptions))
property index_paths : Array(String)

def initialize(@limit : Int32 = DEFAULT_LIMIT)
@paths = [] of Tuple(String, SitemapOptions)
@index_paths = [] of String
end

def add(path : String, options : SitemapOptions)
@paths << {path, options}
end

def index_add(path : String)
@index_paths << path
end

def items(current_page : Int32)
offset = (current_page * @limit) - @limit
@paths[offset, @limit]
end

def index_items
@index_paths
end

# This is calculated each time since you could
# get 1 the first time, then add to it and get 2 the second
def total_pages : Int32
Expand Down
Loading