commit - b77b250a6fbc0ecf95bf91dff9ec25f6a5c4260b
commit + 4912716080cc981b9e10048c4c0493c1c80487b3
blob - a785d7f2fd619836522e66ce6d8d616c7fc05ed4
blob + e769f117de025d598abc67114ea1168eb6bba24d
--- ssg.sh
+++ ssg.sh
while read -r t; do echo "$1" | grep "$t" | cut -f2; done
}
+plan_sitemap() {
+ # if src_hash is empty do nothing
+ if test -z "$1"; then return; fi
+ # if sitemap.xml found in src do nothing
+ if test -f "$SRC/$SSG_SITEMAP_XML"; then return; fi
+ echo "$SSG_SITEMAP_XML"
+ # if robots.txt found in src do nothing
+ if test -f "$SRC/$SSG_ROBOTS_TXT"; then return; fi
+ echo "$SSG_ROBOTS_TXT"
+}
+
# return file expected in dst directory
plan() {
while read -r k f; do
*) continue ;;
esac
done
+ plan_sitemap "$1"
}
# make dst directory and return src hash as is
# remove files and directories not present in plan from dst
clean_up_dst() {
- dst_plan=$(echo "$1" | cut_sort | prepend_kind | plan | sort)
+ dst_plan=$(echo "$1" | cut_sort | prepend_kind | plan "$1" | sort)
dst_files=$(echo "$2" | sort_relative "$DST" | cut_sort)
diff_lines "$dst_plan" "$dst_files" | select_right | files_in "$DST" |
rm_files
# return files to be updated
select_src_files() {
- if is_empty "$1"; then return; fi
+ dst_hash=$(hash_dst)
+ if is_empty "$1"; then
+ clean_up_dst "$src_hash" "$dst_hash"
+ return
+ fi
if ! is_dir "$DST"; then mkdir_select_all "$1" && return; fi
if ! is_ssg_src || ! is_ssg_dst; then rmdir_select_all "$1" && return; fi
- dst_hash=$(hash_dst)
if ! is_matching_ssg_dst "$dst_hash"; then rmdir_select_all "$1" && return; fi
src_hash_diff=$(diff_src "$1")
if is_empty "$src_hash_diff"; then return; fi
select_updated "$src_hash" "$src_hash_diff"
}
+# write sitemap to dst
+generate_sitemap() {
+ # if src_hash is empty do nothing
+ if test -z "$1"; then return; fi
+ # if sitemap.xml found in src do nothing
+ if test -f "$SRC/$SSG_SITEMAP_XML"; then return; fi
+ dst_pages=$(find "$DST" -type f -name '*.html' | sort_relative "$DST")
+ # if dst_pages is empty do nothing
+ if test -z "$dst_pages"; then return; fi
+ # if no pages added or removed do nothing
+ if test -f "$DST/.ssg.dst"; then
+ dst_pages_was=$(cut_sort <"$DST/.ssg.dst" | grep '.html$')
+ dst_pages_updated=$(diff_lines "$dst_pages_was" "$dst_pages")
+ if test -z "$dst_pages_updated"; then return; fi
+ fi
+ # generate sitemap.xml for all pages in dst
+ {
+ site=$(basename "$SRC")
+ echo '<?xml version="1.0" encoding="UTF-8"?>
+<urlset
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
+ http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
+ xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'
+ echo "$dst_pages" | sed -E '
+ s,^$,,
+ s,^'"$DST"',,
+ s,index.html$,,
+ s,^(.*)$, <url><loc>https://'"$site"'/\1</loc></url>,'
+ echo '</urlset>'
+ } >"$DST/$SSG_SITEMAP_XML"
+ info "sitemap $SSG_SITEMAP_XML"
+
+ # if robots.txt found in src do nothing
+ if test -f "$SRC/$SSG_ROBOTS_TXT"; then return; fi
+ # generate robots.txt in dst
+ echo 'user-agent: *
+sitemap: https://'"$site"'/sitemap.xml' >"$DST/$SSG_ROBOTS_TXT"
+ info "sitemap $SSG_ROBOTS_TXT"
+}
+
# write files in dst directory
generate() {
while read -r k f; do
*) info "unknown $f" ;;
esac
done
+ generate_sitemap "$1"
}
# write src and dst hash files to dst directory
SSG_TEMPLATE='.ssg.template'
SSG_SRC='.ssg.src'
SSG_DST='.ssg.dst'
+ SSG_SITEMAP_XML='sitemap.xml'
+ SSG_ROBOTS_TXT='robots.txt'
NCPU=$(sysctl -n hw.ncpu 2>/dev/null || getconf NPROCESSORS_ONLN)
src_hash=$(hash_src)
- select_src_files "$src_hash" | prepend_kind | generate
+ select_src_files "$src_hash" | prepend_kind | generate "$src_hash"
write_hashes "$src_hash" "$(hash_dst)"
}
blob - 81b21fe0ed41b084f56c25688252b3e98b8f95b8
blob + 6a771627ed42f0433cd24383fd758424703d17ca
--- ssg.test.sh
+++ ssg.test.sh
#!/bin/ksh -e
ok_count=0
-ok_expected=24
+ok_expected=27
plan() {
echo "$ok_expected..$ok_count"
file main.css > main.css.gz
md markdown.md, .ssg.template > markdown.html
md markdown.md, .ssg.template > markdown.html.gz
-56fd1f7d1f2bcbe8b452073cc657b27a4eee72cab0e531de679788e5744af652
+sitemap sitemap.xml
+sitemap robots.txt
+de31b4842cafa9761c3bb57c9d60b7651e93f9606dbb5a40f760caefda2f34ea
'
rm -rf "$dir"
}
md markdown.md, .ssg.template > markdown.html
md markdown.md, .ssg.template > markdown.html.gz
copy t.png
-5538af7d3a6abc2aad8609b32f32154ea5d23109e2630399fecd0755ebde544f
+sitemap sitemap.xml
+sitemap robots.txt
+e58355d68978971708283c3099a1f15c2f4514964e1456af3a2c554db93af090
'
+
expected_dst='
.ssg.dst
.ssg.src
html2.html.gz
markdown.html
markdown.html.gz
+robots.txt
+sitemap.xml
t.png
'
not_ok_find "$dst" "$1" "$expected_dst"
html html1.html, .ssg.template > html1.html.gz
md markdown.md, .ssg.template > markdown.html
md markdown.md, .ssg.template > markdown.html.gz
-c3f5b69d372ad66211a052e7b572715941522aae7779eff22cf7becc1c8d18e8
+cc2f8fab08e743666d490c9379e8580981115d95c3b6492893a46b5cd30f5f3a
'
not_ok_find "$dst" "$1" "$expected_dst"
;;
"$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: first run" '
html h.html
html h.html > h.html.gz
-d167244df661961bfe78dd5e1b2c8c563cd588f3583a6438f2af3207401fb10c
+sitemap sitemap.xml
+sitemap robots.txt
+c31554e49bd5671f634ec9392a21ded395383d00bf224088767fd2fc64a42486
'
"$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: second run" '
-d167244df661961bfe78dd5e1b2c8c563cd588f3583a6438f2af3207401fb10c
+c31554e49bd5671f634ec9392a21ded395383d00bf224088767fd2fc64a42486
'
not_ok_find "$dst" "$1" '
.ssg.dst
.ssg.src
h.html
h.html.gz
+robots.txt
+sitemap.xml
'
cat "$dst/h.html" | not_ok_diff "$1" '<html>'
hexdump -C "$dst/h.html.gz" | not_ok_diff_n "$1" '
'
;;
+ generate_sitemap)
+ mkdir "$src" "$dst" && echo '<html>' >"$src/h.html"
+ "$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: first run" '
+html h.html
+html h.html > h.html.gz
+sitemap sitemap.xml
+sitemap robots.txt
+c31554e49bd5671f634ec9392a21ded395383d00bf224088767fd2fc64a42486
+'
+ cat "$dst/sitemap.xml" | not_ok_diff_n "$1" '
+<?xml version="1.0" encoding="UTF-8"?>
+<urlset
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
+ http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
+ xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+ <url><loc>https://src/h.html</loc></url>
+</urlset>
+'
+ rm "$src/h.html"
+
+ "$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: second run" '
+rm h.html
+rm h.html.gz
+rm robots.txt
+rm sitemap.xml
+01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b
+'
+
+ not_ok_find "$dst" "$1" '
+.ssg.dst
+.ssg.src
+'
+ ;;
+
+ generate_sitemap_xml_found_in_src)
+ mkdir "$src" "$dst"
+ echo '<html>' >"$src/h.html"
+ echo >"$src/sitemap.xml"
+ "$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: first run" '
+html h.html
+html h.html > h.html.gz
+file sitemap.xml
+file sitemap.xml > sitemap.xml.gz
+8ff598b31385c53268c54ff343e33ff60bbf0605d5efcd4f7c5f84a395eaaaa4
+'
+ cat "$dst/sitemap.xml" | not_ok_diff "$1" ''
+ ;;
+
+ generate_sitemap_robots_txt_found_in_src)
+ mkdir "$src" "$dst"
+ echo '<html>' >"$src/h.html"
+ echo >"$src/robots.txt"
+ "$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: first run" '
+html h.html
+html h.html > h.html.gz
+file robots.txt
+file robots.txt > robots.txt.gz
+sitemap sitemap.xml
+380bbf740dad47d4036e88e89a07b7a1d1f94657ca5bcf9e5f10f1feddd8c799
+'
+ cat "$dst/robots.txt" | not_ok_diff "$1" ''
+ ;;
+
generate_html_with_template)
mkdir "$src" "$dst"
echo '<h1>h1</h1>' >"$src/h.html"
template .ssg.template
html h.html, .ssg.template > h.html
html h.html, .ssg.template > h.html.gz
-7cbe380c112e232fa4b618b1837d11c47abdfd01863ae484fdb411ade0f43af1
+sitemap sitemap.xml
+sitemap robots.txt
+41ea28f2ec31b10d054cd52d4f5586974b37f2028b3841fa8bd4f1821d17d1fe
'
"$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: second run" '
-7cbe380c112e232fa4b618b1837d11c47abdfd01863ae484fdb411ade0f43af1
+41ea28f2ec31b10d054cd52d4f5586974b37f2028b3841fa8bd4f1821d17d1fe
'
not_ok_find "$dst" "$1" '
.ssg.dst
.ssg.src
h.html
h.html.gz
+robots.txt
+sitemap.xml
'
cat "$dst/h.html" | not_ok_diff "$1" '<title>h1~src</title><h1>h1</h1>'
html h.html, .ssg.template > h.html.gz
html p.html, .ssg.template > p.html
html p.html, .ssg.template > p.html.gz
-816fcd36b20ed6114d6e13c15182ae33186676aa091558dce61f66c8e43c8b62
+sitemap sitemap.xml
+sitemap robots.txt
+452338ffd109bbe64021f7722cce4beb854494f37298edd08da1b0e484d0e7dd
'
not_ok_find "$dst" "$1" '
.ssg.dst
h.html.gz
p.html
p.html.gz
+robots.txt
+sitemap.xml
'
cat "$dst/h.html" | not_ok_diff "$1" '<title>h1: src</title><h1>h1</h1>'
cat "$dst/p.html" | not_ok_diff "$1" '<title>src</title>p'
;;
-
generate_html_with_template_in_dir)
mkdir "$src" "$src/dir"
echo >"$src/h1.html"
html dir/h2.html, dir/.ssg.template > dir/h2.html.gz
html h1.html, .ssg.template > h1.html
html h1.html, .ssg.template > h1.html.gz
-8602b68b3b149d967e43d9c4948099d4cbbc3edd2f0a5e46f01c37af78ba8506
+sitemap sitemap.xml
+sitemap robots.txt
+51147e86d5a634da68279934469c49305735a5a0516b0b6327fb00df86795832
'
"$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: second run" '
-8602b68b3b149d967e43d9c4948099d4cbbc3edd2f0a5e46f01c37af78ba8506
+51147e86d5a634da68279934469c49305735a5a0516b0b6327fb00df86795832
'
not_ok_find "$dst" "$1" '
.ssg.dst
dir/h2.html.gz
h1.html
h1.html.gz
+robots.txt
+sitemap.xml
'
cat "$dst/h1.html" | not_ok_diff "$1" '/'
"$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: first run" '
html h.html
html h.html > h.html.gz
-255d20dac0c5587bd5499827d3528db1433d60c04168ca2d9b2427de0f9a440e
+sitemap sitemap.xml
+sitemap robots.txt
+52494b82f46c80147bde275b53bf7318998d6986eaf6e503d7fe3dadfdf67d19
'
"$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: second run" '
-255d20dac0c5587bd5499827d3528db1433d60c04168ca2d9b2427de0f9a440e
+52494b82f46c80147bde275b53bf7318998d6986eaf6e503d7fe3dadfdf67d19
'
not_ok_find "$dst" "$1" '
.ssg.dst
.ssg.src
h.html
h.html.gz
+robots.txt
+sitemap.xml
'
cat "$dst/h.html" | not_ok_diff "$1" '<h1>h1</h1>'
template .ssg.template
md h.md, .ssg.template > h.html
md h.md, .ssg.template > h.html.gz
-916917ec6944394281526a5ba9276e39bdff828105b965188c168c847065be63
+sitemap sitemap.xml
+sitemap robots.txt
+ea62f877148817dcb0bf8b1d76e691880cd58961f239f9e2e25f282c79da26e6
'
"$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: second run" '
-916917ec6944394281526a5ba9276e39bdff828105b965188c168c847065be63
+ea62f877148817dcb0bf8b1d76e691880cd58961f239f9e2e25f282c79da26e6
'
not_ok_find "$dst" "$1" '
.ssg.dst
.ssg.src
h.html
h.html.gz
+robots.txt
+sitemap.xml
'
cat "$dst/h.html" |
"$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: first run" '
md h.md > h.html
md h.md > h.html.gz
-2f33f1e74aa7d6f33502cc842f2001a7759da120f0fb0f84a6295c1fe81d0319
+sitemap sitemap.xml
+sitemap robots.txt
+541864f1b492230aa29853b08cf13533054817db9b19bc75ebd47201e04bd470
'
"$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: second run" '
-2f33f1e74aa7d6f33502cc842f2001a7759da120f0fb0f84a6295c1fe81d0319
+541864f1b492230aa29853b08cf13533054817db9b19bc75ebd47201e04bd470
'
not_ok_find "$dst" "$1" '
.ssg.dst
.ssg.src
h.html
h.html.gz
+robots.txt
+sitemap.xml
'
cat "$dst/h.html" | not_ok_diff "$1" '<h1 id="h1">h1</h1>'
t generate_md_template_not_found
t generate_sh
t generate_sh_with_collision
+t generate_sitemap
+t generate_sitemap_xml_found_in_src
+t generate_sitemap_robots_txt_found_in_src
t write_hashes
basic_case && bench 4 basic_case