
Behind the scenes, I leverage a few different scripts that build then publish the site.
The build process See build script details below involves the following steps:
- build:blogroll - fetch the latest entries for each blog in the blogroll
- build:sort_frontmatter - I wanted the tags to always be alphabetical
- build:hugo - this script leverages the
hugo
command to build the public pages - build:redirects - generate the redirects to preserve URLs
- build:amplify - create the AMP-compliant HTML page
- build:beautify - I like nice tabbed HTML source
- build:minify - compress the underlying CSS
- build:duplicate_feed - I have an old feed URL and a new feed URL I need to maintain; This creates that duplicate
Once built, the publish script See publish script details below pushes those changes to Github (which is my static site host).
The Code
Preamble
The pre-amble constants that are repeatedly used throughout
require 'toml-rb'
PROJECT_PATH = File.expand_path('../', __FILE__)
SITE_CONFIG = TomlRB.load_file(File.join(PROJECT_PATH, 'config.toml'))
PUBLIC_PATH = File.expand_path('../public', __FILE__)
ORGINAL_PROJECT_PATH = File.expand_path('../../takeonrules.github.io-source', __FILE__)
Structure and Method for Page
Function for loading a structural representation of a page/post. This is a common task for the website, so I’ve extracted a method and a structure to encode that behavior.
# Exposes a common data structure for interacting with a page/post
FileWithFrontmatterAndContent = Struct.new(:filename, :frontmatter, :body) do
def content
[Psych.dump(sorted_frontmatter).strip, '---', body].join("\n")
end
def tags
frontmatter.fetch("tags", [])
end
def sorted_frontmatter
returning_value = {}
sorted_frontmatter = frontmatter.sort { |a,b| a[0] <=> b[0] }
sorted_frontmatter.each do |key, value|
if value.is_a?(Array)
returning_value[key] = value.sort
else
returning_value[key] = value
end
end
returning_value
end
end
# Responsible for loading the given filename, separating frontmatter from content
# and returning a FileWithFrontmatterAndContent object
def load_file_with_frontmatter_and_content_from(filename)
frontmatter_text = ''
content = ''
frontmatter = nil
File.readlines(filename).each do |line|
if line.strip == '---'
if frontmatter.nil?
frontmatter = true
next
elsif frontmatter == true
frontmatter = false
end
elsif frontmatter
frontmatter_text += line
else
content += line
end
end
frontmatter = Psych.load(frontmatter_text)
FileWithFrontmatterAndContent.new(filename, frontmatter, content)
end
Template and Method for Generating Redirects
Below is the HTML template and the code used to generate those redirect files.
# The template used when generating an HTML-page redirect.
REDIRECT_TEMPLATE = %(
<!DOCTYPE html>
<html>
<head>
<title>%{to}</title>
<link rel="canonical" href="%{to}"/>
<meta name="robots" content="noindex">
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<meta http-equiv="refresh" content="0; url=%{to}"/>
</head>
<body>
<h1>Redirecting to %{to}</h1>
<a href="%{to}">Click here if you are not redirected.</a>
</body>
</html>
).strip
# Responsible for creating a redirect page based on the given paramters.
# The page will redirect to the given :to_slug, from the given :from_slug
def create_redirect_page_for(from_slug:, to_slug:, skip_existing_file: true)
from_file_directory = File.join(PUBLIC_PATH, from_slug)
from_filename = File.join(from_file_directory, 'index.html')
if skip_existing_file && File.exist?(from_filename)
$stdout.puts "\tSkipping #{from_slug}; Redirect already exists"
else
content = REDIRECT_TEMPLATE % { to: File.join(SITE_CONFIG.fetch("baseURL"), to_slug, '/') }
FileUtils.mkdir_p(from_file_directory)
$stdout.puts %(\tCreating redirect at "#{from_slug}")
File.open(from_filename, 'w+') do |file|
file.puts(content)
end
end
end
The Build Process
Below are the tasks that comprises the build process.
desc "Build the hugo site for production, target to ./public"
task build: [
"build:blogroll",
"build:sort_frontmatter",
"build:hugo",
"build:redirects",
"build:amplify",
"build:beautify",
"build:minify",
"build:duplicate_feed"
]
And the individual details of each of those build tasks.
namespace :build do
desc 'Remove all, except .git, files in ./public'
task :cleanDestinationDir do
require 'fileutils'
if !system("cd #{PUBLIC_PATH} && git checkout gh-pages && git reset --hard && git clean -df && git pull --rebase")
$stderr.puts "Error cleaning destination directory, see above messages"
exit!(1)
end
Dir.glob(File.join(PUBLIC_PATH, '*')).each do |filename|
next if filename =~ /\.git$/
FileUtils.rm_rf(filename)
end
end
desc "Use hugo to build the ./public dir"
task hugo: ["build:cleanDestinationDir"] do
$stdout.puts "Buidling hugo site to ./public"
if !system("cd #{PROJECT_PATH}; hugo")
$stderr.puts "\tError building website"
exit!(2)
end
end
desc 'Using the ./data/redirects.yml, build redirects in ./public'
task redirects: ["build:hugo"] do
$stdout.puts "Creating Redirects…"
require 'fileutils'
require 'psych'
redirects_file = File.join(PROJECT_PATH, 'data/redirects.yml')
Psych.load_file(redirects_file).each do |redirect|
create_redirect_page_for(
from_slug: File.join('/', redirect.fetch('from'), '/'),
to_slug: redirect.fetch('to'),
skip_existing_file: redirect.fetch('skip_existing_file')
)
end
$stdout.puts "\tDone Creating Redirects"
end
desc 'Working with the existing files, build AMP friendly versions in ./public'
task amplify: ["build:hugo"] do
require 'rest_client'
require 'nokogiri'
$stdout.puts "Amplifying the content…"
# Need to clean-up stylesheet as it includes elements that are not AMP compatable
stylesheet_content = ''
skipping = false
# Because there are style declarations that should not be included as they violate
# AMP requirements
tufte_filename = Dir.glob(File.join(PUBLIC_PATH, "css/tufte.*.css")).first
File.readlines(tufte_filename).each do |line|
next if line =~ /@charset/
if line =~ /\A *\// # Remove comments
if line.strip == "/* BEGIN SKIP-AMP */"
skipping = true
elsif line.strip == "/* END SKIP-AMP */"
skipping = false
end
next
else
next if skipping
stylesheet_content += line.strip + "\n"
end
end
stylesheet_content = RestClient.post "https://cssminifier.com/raw", input: stylesheet_content
# These scripts need to be injected into every page
base_amp_scripts = []
base_amp_scripts << %(<style amp-custom>#{stylesheet_content}</style>)
base_amp_scripts << %(<style amp-boilerplate>body{-webkit-animation:-amp-start 8s steps(1,end) 0s 1 normal both;-moz-animation:-amp-start 8s steps(1,end) 0s 1 normal both;-ms-animation:-amp-start 8s steps(1,end) 0s 1 normal both;animation:-amp-start 8s steps(1,end) 0s 1 normal both}@-webkit-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@-moz-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@-ms-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@-o-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}</style><noscript><style amp-boilerplate>body{-webkit-animation:none;-moz-animation:none;-ms-animation:none;animation:none}</style></noscript>)
base_amp_scripts << %(<script async src="https://cdn.ampproject.org/v0.js"></script>)
base_amp_scripts << %(<script async custom-element="amp-form" src="https://cdn.ampproject.org/v0/amp-form-0.1.js"></script>)
base_amp_scripts << %(<script async custom-element="amp-analytics" src="https://cdn.ampproject.org/v0/amp-analytics-0.1.js"></script>)
Dir.glob(File.join(PUBLIC_PATH, "**/*.html")).each do |filename|
next if filename.start_with?(File.join(PUBLIC_PATH, 'assets'))
next if filename.start_with?(File.join(PUBLIC_PATH, 'css'))
next if filename.start_with?(File.join(PUBLIC_PATH, 'fonts'))
next if filename.start_with?(File.join(PUBLIC_PATH, 'amp'))
# Skipping tag as those are now in tags/
next if filename.start_with?(File.join(PUBLIC_PATH, 'tag/'))
amp_scripts = base_amp_scripts.clone
# Checking blog posts
if filename =~ /\/\d{4}\//
amp_filename = filename.sub(/\/(\d{4})\//, '/amp/\1/')
else
# Checking pages
amp_filename = filename.sub(PUBLIC_PATH, File.join(PUBLIC_PATH, 'amp'))
end
FileUtils.mkdir_p(File.dirname(amp_filename))
content = File.read(filename)
# Ensure that HTML is marked as AMP ready
content.sub!(/^ *\<html /, '<html amp ')
content.sub!(%(manifest="/cache.appcache"), '')
content.sub!("hide-when-javascript-disabled", '')
# Because details-tag is not valid in amp
content.gsub!("<details open", "<span")
# Likewise because details-tag and summary-tag are not valid in amp
content.gsub!(/\<(\/?)(summary|details)/, '<\1span')
doc = Nokogiri::HTML(content)
# Convert img tag into an AMP compliant amp-img tag
doc.css('img').each do |node|
amp_img = doc.create_element('amp-img')
src = node.get_attribute('src')
width = node.get_attribute('data-width')
height = node.get_attribute('data-height')
amp_img.set_attribute('src', src)
amp_img.set_attribute('width', width)
amp_img.set_attribute('height', height)
amp_img.set_attribute('layout', 'responsive')
node.replace amp_img
end
added_iframe_script = false
# Convert iframe tag into an AMP compliant amp-iframe tag
doc.css('iframe').each do |node|
amp_iframe = doc.create_element('amp-iframe')
node.attributes.each do |key, value|
next if key == 'marginheight'
next if key == 'marginwidth'
amp_iframe.set_attribute(key, value.to_s)
end
amp_iframe.set_attribute('sandbox', "allow-scripts allow-same-origin")
amp_iframe.set_attribute('layout', "responsive")
noscript = doc.create_element('noscript')
noscript << node.clone
node.parent << noscript
node.replace amp_iframe
next if added_iframe_script
added_iframe_script = true
amp_scripts << %(<script async custom-element="amp-iframe" src="https://cdn.ampproject.org/v0/amp-iframe-0.1.js"></script>)
end
doc.css('script').each do |node|
# LD+JSON is valid for amp; All others are not
next if node['type'] == 'application/ld+json'
node.remove
end
doc.css('link[media]').each do |node|
node.remove
end
# Because the license contains several problematic amp attributes,
# I'm removing that license
doc.css('.credits .license').each do |node|
node.remove
end
content = doc.to_html
content.sub!("</head>", amp_scripts.join("\n") + "\n</head>")
amp_analytics = %(<amp-analytics type="gtag" data-credentials="include">\n<script type="application/json">\n{ "vars" : { "gtag_id": "#{SITE_CONFIG.fetch('googleAnalytics')}", "config" : { "#{SITE_CONFIG.fetch('googleAnalytics')}": { "groups": "default" } } } }\n</script>\n</amp-analytics>)
content.sub!(/\<body([^\>]*)\>/, '<body\1>' + "\n#{amp_analytics}")
File.open(amp_filename, 'w+') { |f| f.puts content }
end
$stdout.puts "\tDone Amplifying"
end
desc 'Beautify the HTML of the sites'
task beautify: ["build:hugo", "build:redirects", "build:amplify"] do
$stdout.puts "Beautfying the HTML…"
# Redirects and resulting amp pages should be beautiful too
require 'htmlbeautifier'
require 'nokogiri'
require 'json'
Dir.glob(File.join(PROJECT_PATH, 'public', "**/*.html")).each do |filename|
messy = File.read(filename)
doc = Nokogiri::HTML(messy)
doc.css('script').each do |node|
next unless node['type'] == 'application/ld+json'
begin
json = JSON.dump(JSON.load(node.content))
node.content = json
rescue JSON::ParserError => e
$stderr.puts "JSON parse error encountered in #{filename}"
raise e
end
end
messy = doc.to_html
beautiful = HtmlBeautifier.beautify(messy, indent: "\t")
File.open(filename, 'w+') do |f|
f.puts beautiful
end
end
$stdout.puts "\tDone Beautifying"
end
task duplicate_feed: ["build:hugo"] do
# Because some sources have https://takeonrules.com/feed/ I need to resolve that behavior
require 'fileutils'
$stdout.puts "Duplicating and building externally published feeds"
feed = File.join(PUBLIC_PATH, 'feed.xml')
alternate_feed = File.join(PUBLIC_PATH, 'feed/index.xml')
FileUtils.mkdir_p(File.join(PUBLIC_PATH, "feed"))
FileUtils.cp(feed, alternate_feed)
end
task minify: ["build:hugo", "build:amplify"] do
# Minify-ing CSS will remove some comments that are build switches for the amplify process
# So amplify first
require 'rest_client'
$stdout.puts "Minifying CSS"
# TODO as part of the amplify, I'd like to send along a minified CSS; For now, that just won't happen
Dir.glob(File.join(PROJECT_PATH, 'public/css/**/*.css')).each do |filename|
response = RestClient.post "https://cssminifier.com/raw", input: File.read(filename)
File.open(filename, "w+") do |f|
f.puts response
end
end
end
namespace :blogroll do
desc "Fetch blogroll entries"
task :fetch do
if ENV["NO_BLOGROLL"]
$stdout.puts "Skipping blog roll"
next
end
$stdout.puts "Fetching blog roll entries"
require 'rest_client'
require 'nokogiri'
require 'time'
require 'psych'
require 'feedjira'
require 'uri'
class BlogRollEntry
attr_reader :site_url, :site_title, :item_pubDate, :item_title, :item_url, :author
def initialize(xml:)
feed = Feedjira::Feed.parse(xml)
item = feed.entries.first
uri = URI.parse(feed.url)
@site_url = "#{uri.scheme}://#{uri.host}"
@site_title = feed.title
@item_pubDate = item.published.strftime('%Y-%m-%d %H:%M:%S %z')
@item_url = item.url
if item.title
@item_title = item.title
else
@item_title = item.url.split("/").last.sub(/\.\w+$/, '').gsub(/\W+/, ' ')
end
end
include Comparable
def <=>(other)
date_comparison = item_pubDate <=> other.item_pubDate
return date_comparison unless date_comparison == 0
site_title <=> other.site_title
end
def to_hash
{
"site_url" => site_url,
"site_title" => site_title,
"item_pubDate" => item_pubDate,
"item_title" => item_title,
"item_url" => item_url
}
end
end
entries = []
blogroll = Psych.load_file(File.join(PROJECT_PATH, 'data/blogroll.yml'))
blogroll.each do |feed_url|
begin
$stdout.puts "\tFetching #{feed_url}"
response = RestClient.get(feed_url)
entries << BlogRollEntry.new(xml: response.body)
rescue RestClient::Exceptions::OpenTimeout
$stdout.puts "Timeout for #{feed_url}, moving on"
end
end
output = entries.sort.reverse.map(&:to_hash)
File.open(File.join(PROJECT_PATH, 'data/blogroll_entries.yml'), 'w+') do |f|
f.puts Psych.dump(output)
end
end
desc "Commit blogroll entries"
task :commit do
if ENV["NO_BLOGROLL"]
$stdout.puts "Skipping blog roll"
next
end
message = "Updating blogroll entries\n\n```shell\n$ bundle exec rake publish:blogroll\n```"
$stdout.puts "Committing ./data/blogroll_entries.yml"
system(%{cd #{PROJECT_PATH}; git add #{File.join(PROJECT_PATH, 'data/blogroll_entries.yml')}; git commit -m "#{message}"})
end
end
desc "Sort frontmatter alphabetically"
task :sort_frontmatter do
require 'psych'
$stdout.puts "Sorting front matter"
Dir.glob(File.join(PROJECT_PATH, 'content/**', '*.md')).each do |filename|
file_with_frontmatter_and_content = load_file_with_frontmatter_and_content_from(filename)
File.open(filename, 'w+') do |f|
f.puts file_with_frontmatter_and_content.content
end
end
end
task blogroll: ["build:blogroll:fetch", "build:blogroll:commit"]
desc "Fetch blog roll entries"
end
The Publish Process
The task that is used to push the pages up to the server. Note that prior to
running publish
, I run the build
process
desc "Publish changes to https://takeonrules.com"
task publish: :build do
project_sha = `cd #{PROJECT_PATH} && git log --pretty=format:'%H' -1`.strip
message = "Site updated at #{Time.now.utc}\n\nUsing SHA1 #{project_sha}\nfrom source repository\n\n```shell\n$ bundle exec rake publish\n```"
$stdout.puts "Committing ./public pages"
system("cd #{PUBLIC_PATH} && git checkout gh-pages && git add . && git commit -am '#{message}' && git checkout master && git rebase gh-pages")
$stdout.puts "Pushing ./public pages"
system("cd #{PUBLIC_PATH} && git push origin gh-pages && git push origin master")
$stdout.puts "Updating project's pointer for ./public submodule"
system(%(cd #{PROJECT_PATH} && git add public && git commit -m "#{message}" && git push origin master))
end