commit 4912716080cc981b9e10048c4c0493c1c80487b3 from: roman zolotarev date: Mon Jan 12 23:35:26 2026 UTC add: generate_sitemap() commit - b77b250a6fbc0ecf95bf91dff9ec25f6a5c4260b commit + 4912716080cc981b9e10048c4c0493c1c80487b3 blob - a785d7f2fd619836522e66ce6d8d616c7fc05ed4 blob + e769f117de025d598abc67114ea1168eb6bba24d --- ssg.sh +++ ssg.sh @@ -260,6 +260,17 @@ pages_by_templates() { while read -r t; do echo "$1" | grep "$t" | cut -f2; done } +plan_sitemap() { + # if src_hash is empty do nothing + if test -z "$1"; then return; fi + # if sitemap.xml found in src do nothing + if test -f "$SRC/$SSG_SITEMAP_XML"; then return; fi + echo "$SSG_SITEMAP_XML" + # if robots.txt found in src do nothing + if test -f "$SRC/$SSG_ROBOTS_TXT"; then return; fi + echo "$SSG_ROBOTS_TXT" +} + # return file expected in dst directory plan() { while read -r k f; do @@ -275,6 +286,7 @@ plan() { *) continue ;; esac done + plan_sitemap "$1" } # make dst directory and return src hash as is @@ -317,7 +329,7 @@ select_right() { sed -n 's/^> \([^\ ]*\).*/\1/p'; } # remove files and directories not present in plan from dst clean_up_dst() { - dst_plan=$(echo "$1" | cut_sort | prepend_kind | plan | sort) + dst_plan=$(echo "$1" | cut_sort | prepend_kind | plan "$1" | sort) dst_files=$(echo "$2" | sort_relative "$DST" | cut_sort) diff_lines "$dst_plan" "$dst_files" | select_right | files_in "$DST" | rm_files @@ -345,10 +357,13 @@ diff_src() { diff_lines "$(cat "$DST/$SSG_SRC")" "$1"; # return files to be updated select_src_files() { - if is_empty "$1"; then return; fi + dst_hash=$(hash_dst) + if is_empty "$1"; then + clean_up_dst "$src_hash" "$dst_hash" + return + fi if ! is_dir "$DST"; then mkdir_select_all "$1" && return; fi if ! is_ssg_src || ! is_ssg_dst; then rmdir_select_all "$1" && return; fi - dst_hash=$(hash_dst) if ! is_matching_ssg_dst "$dst_hash"; then rmdir_select_all "$1" && return; fi src_hash_diff=$(diff_src "$1") if is_empty "$src_hash_diff"; then return; fi @@ -356,6 +371,47 @@ select_src_files() { select_updated "$src_hash" "$src_hash_diff" } +# write sitemap to dst +generate_sitemap() { + # if src_hash is empty do nothing + if test -z "$1"; then return; fi + # if sitemap.xml found in src do nothing + if test -f "$SRC/$SSG_SITEMAP_XML"; then return; fi + dst_pages=$(find "$DST" -type f -name '*.html' | sort_relative "$DST") + # if dst_pages is empty do nothing + if test -z "$dst_pages"; then return; fi + # if no pages added or removed do nothing + if test -f "$DST/.ssg.dst"; then + dst_pages_was=$(cut_sort <"$DST/.ssg.dst" | grep '.html$') + dst_pages_updated=$(diff_lines "$dst_pages_was" "$dst_pages") + if test -z "$dst_pages_updated"; then return; fi + fi + # generate sitemap.xml for all pages in dst + { + site=$(basename "$SRC") + echo ' +' + echo "$dst_pages" | sed -E ' + s,^$,, + s,^'"$DST"',, + s,index.html$,, + s,^(.*)$, https://'"$site"'/\1,' + echo '' + } >"$DST/$SSG_SITEMAP_XML" + info "sitemap $SSG_SITEMAP_XML" + + # if robots.txt found in src do nothing + if test -f "$SRC/$SSG_ROBOTS_TXT"; then return; fi + # generate robots.txt in dst + echo 'user-agent: * +sitemap: https://'"$site"'/sitemap.xml' >"$DST/$SSG_ROBOTS_TXT" + info "sitemap $SSG_ROBOTS_TXT" +} + # write files in dst directory generate() { while read -r k f; do @@ -370,6 +426,7 @@ generate() { *) info "unknown $f" ;; esac done + generate_sitemap "$1" } # write src and dst hash files to dst directory @@ -389,10 +446,12 @@ main() { SSG_TEMPLATE='.ssg.template' SSG_SRC='.ssg.src' SSG_DST='.ssg.dst' + SSG_SITEMAP_XML='sitemap.xml' + SSG_ROBOTS_TXT='robots.txt' NCPU=$(sysctl -n hw.ncpu 2>/dev/null || getconf NPROCESSORS_ONLN) src_hash=$(hash_src) - select_src_files "$src_hash" | prepend_kind | generate + select_src_files "$src_hash" | prepend_kind | generate "$src_hash" write_hashes "$src_hash" "$(hash_dst)" } blob - 81b21fe0ed41b084f56c25688252b3e98b8f95b8 blob + 6a771627ed42f0433cd24383fd758424703d17ca --- ssg.test.sh +++ ssg.test.sh @@ -1,7 +1,7 @@ #!/bin/ksh -e ok_count=0 -ok_expected=24 +ok_expected=27 plan() { echo "$ok_expected..$ok_count" @@ -83,7 +83,9 @@ file main.css file main.css > main.css.gz md markdown.md, .ssg.template > markdown.html md markdown.md, .ssg.template > markdown.html.gz -56fd1f7d1f2bcbe8b452073cc657b27a4eee72cab0e531de679788e5744af652 +sitemap sitemap.xml +sitemap robots.txt +de31b4842cafa9761c3bb57c9d60b7651e93f9606dbb5a40f760caefda2f34ea ' rm -rf "$dir" } @@ -253,8 +255,11 @@ html html2.html > html2.html.gz md markdown.md, .ssg.template > markdown.html md markdown.md, .ssg.template > markdown.html.gz copy t.png -5538af7d3a6abc2aad8609b32f32154ea5d23109e2630399fecd0755ebde544f +sitemap sitemap.xml +sitemap robots.txt +e58355d68978971708283c3099a1f15c2f4514964e1456af3a2c554db93af090 ' + expected_dst=' .ssg.dst .ssg.src @@ -264,6 +269,8 @@ html2.html html2.html.gz markdown.html markdown.html.gz +robots.txt +sitemap.xml t.png ' not_ok_find "$dst" "$1" "$expected_dst" @@ -275,7 +282,7 @@ html html1.html, .ssg.template > html1.html html html1.html, .ssg.template > html1.html.gz md markdown.md, .ssg.template > markdown.html md markdown.md, .ssg.template > markdown.html.gz -c3f5b69d372ad66211a052e7b572715941522aae7779eff22cf7becc1c8d18e8 +cc2f8fab08e743666d490c9379e8580981115d95c3b6492893a46b5cd30f5f3a ' not_ok_find "$dst" "$1" "$expected_dst" ;; @@ -326,16 +333,20 @@ t.txt.gz "$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: first run" ' html h.html html h.html > h.html.gz -d167244df661961bfe78dd5e1b2c8c563cd588f3583a6438f2af3207401fb10c +sitemap sitemap.xml +sitemap robots.txt +c31554e49bd5671f634ec9392a21ded395383d00bf224088767fd2fc64a42486 ' "$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: second run" ' -d167244df661961bfe78dd5e1b2c8c563cd588f3583a6438f2af3207401fb10c +c31554e49bd5671f634ec9392a21ded395383d00bf224088767fd2fc64a42486 ' not_ok_find "$dst" "$1" ' .ssg.dst .ssg.src h.html h.html.gz +robots.txt +sitemap.xml ' cat "$dst/h.html" | not_ok_diff "$1" '' hexdump -C "$dst/h.html.gz" | not_ok_diff_n "$1" ' @@ -345,6 +356,70 @@ h.html.gz ' ;; + generate_sitemap) + mkdir "$src" "$dst" && echo '' >"$src/h.html" + "$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: first run" ' +html h.html +html h.html > h.html.gz +sitemap sitemap.xml +sitemap robots.txt +c31554e49bd5671f634ec9392a21ded395383d00bf224088767fd2fc64a42486 +' + cat "$dst/sitemap.xml" | not_ok_diff_n "$1" ' + + + https://src/h.html + +' + rm "$src/h.html" + + "$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: second run" ' +rm h.html +rm h.html.gz +rm robots.txt +rm sitemap.xml +01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b +' + + not_ok_find "$dst" "$1" ' +.ssg.dst +.ssg.src +' + ;; + + generate_sitemap_xml_found_in_src) + mkdir "$src" "$dst" + echo '' >"$src/h.html" + echo >"$src/sitemap.xml" + "$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: first run" ' +html h.html +html h.html > h.html.gz +file sitemap.xml +file sitemap.xml > sitemap.xml.gz +8ff598b31385c53268c54ff343e33ff60bbf0605d5efcd4f7c5f84a395eaaaa4 +' + cat "$dst/sitemap.xml" | not_ok_diff "$1" '' + ;; + + generate_sitemap_robots_txt_found_in_src) + mkdir "$src" "$dst" + echo '' >"$src/h.html" + echo >"$src/robots.txt" + "$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: first run" ' +html h.html +html h.html > h.html.gz +file robots.txt +file robots.txt > robots.txt.gz +sitemap sitemap.xml +380bbf740dad47d4036e88e89a07b7a1d1f94657ca5bcf9e5f10f1feddd8c799 +' + cat "$dst/robots.txt" | not_ok_diff "$1" '' + ;; + generate_html_with_template) mkdir "$src" "$dst" echo '

h1

' >"$src/h.html" @@ -354,16 +429,20 @@ h.html.gz template .ssg.template html h.html, .ssg.template > h.html html h.html, .ssg.template > h.html.gz -7cbe380c112e232fa4b618b1837d11c47abdfd01863ae484fdb411ade0f43af1 +sitemap sitemap.xml +sitemap robots.txt +41ea28f2ec31b10d054cd52d4f5586974b37f2028b3841fa8bd4f1821d17d1fe ' "$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: second run" ' -7cbe380c112e232fa4b618b1837d11c47abdfd01863ae484fdb411ade0f43af1 +41ea28f2ec31b10d054cd52d4f5586974b37f2028b3841fa8bd4f1821d17d1fe ' not_ok_find "$dst" "$1" ' .ssg.dst .ssg.src h.html h.html.gz +robots.txt +sitemap.xml ' cat "$dst/h.html" | not_ok_diff "$1" 'h1~src

h1

' @@ -387,7 +466,9 @@ html h.html, .ssg.template > h.html html h.html, .ssg.template > h.html.gz html p.html, .ssg.template > p.html html p.html, .ssg.template > p.html.gz -816fcd36b20ed6114d6e13c15182ae33186676aa091558dce61f66c8e43c8b62 +sitemap sitemap.xml +sitemap robots.txt +452338ffd109bbe64021f7722cce4beb854494f37298edd08da1b0e484d0e7dd ' not_ok_find "$dst" "$1" ' .ssg.dst @@ -396,13 +477,14 @@ h.html h.html.gz p.html p.html.gz +robots.txt +sitemap.xml ' cat "$dst/h.html" | not_ok_diff "$1" 'h1: src

h1

' cat "$dst/p.html" | not_ok_diff "$1" 'srcp' ;; - generate_html_with_template_in_dir) mkdir "$src" "$src/dir" echo >"$src/h1.html" @@ -417,10 +499,12 @@ html dir/h2.html, dir/.ssg.template > dir/h2.html html dir/h2.html, dir/.ssg.template > dir/h2.html.gz html h1.html, .ssg.template > h1.html html h1.html, .ssg.template > h1.html.gz -8602b68b3b149d967e43d9c4948099d4cbbc3edd2f0a5e46f01c37af78ba8506 +sitemap sitemap.xml +sitemap robots.txt +51147e86d5a634da68279934469c49305735a5a0516b0b6327fb00df86795832 ' "$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: second run" ' -8602b68b3b149d967e43d9c4948099d4cbbc3edd2f0a5e46f01c37af78ba8506 +51147e86d5a634da68279934469c49305735a5a0516b0b6327fb00df86795832 ' not_ok_find "$dst" "$1" ' .ssg.dst @@ -429,6 +513,8 @@ dir/h2.html dir/h2.html.gz h1.html h1.html.gz +robots.txt +sitemap.xml ' cat "$dst/h1.html" | not_ok_diff "$1" '/' @@ -442,16 +528,20 @@ h1.html.gz "$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: first run" ' html h.html html h.html > h.html.gz -255d20dac0c5587bd5499827d3528db1433d60c04168ca2d9b2427de0f9a440e +sitemap sitemap.xml +sitemap robots.txt +52494b82f46c80147bde275b53bf7318998d6986eaf6e503d7fe3dadfdf67d19 ' "$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: second run" ' -255d20dac0c5587bd5499827d3528db1433d60c04168ca2d9b2427de0f9a440e +52494b82f46c80147bde275b53bf7318998d6986eaf6e503d7fe3dadfdf67d19 ' not_ok_find "$dst" "$1" ' .ssg.dst .ssg.src h.html h.html.gz +robots.txt +sitemap.xml ' cat "$dst/h.html" | not_ok_diff "$1" '

h1

' @@ -483,16 +573,20 @@ fail: h.md collides with h.html template .ssg.template md h.md, .ssg.template > h.html md h.md, .ssg.template > h.html.gz -916917ec6944394281526a5ba9276e39bdff828105b965188c168c847065be63 +sitemap sitemap.xml +sitemap robots.txt +ea62f877148817dcb0bf8b1d76e691880cd58961f239f9e2e25f282c79da26e6 ' "$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: second run" ' -916917ec6944394281526a5ba9276e39bdff828105b965188c168c847065be63 +ea62f877148817dcb0bf8b1d76e691880cd58961f239f9e2e25f282c79da26e6 ' not_ok_find "$dst" "$1" ' .ssg.dst .ssg.src h.html h.html.gz +robots.txt +sitemap.xml ' cat "$dst/h.html" | @@ -513,16 +607,20 @@ h.html.gz "$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: first run" ' md h.md > h.html md h.md > h.html.gz -2f33f1e74aa7d6f33502cc842f2001a7759da120f0fb0f84a6295c1fe81d0319 +sitemap sitemap.xml +sitemap robots.txt +541864f1b492230aa29853b08cf13533054817db9b19bc75ebd47201e04bd470 ' "$cmd" "$src" "$dst" 2>&1 | not_ok_diff_n "$1: second run" ' -2f33f1e74aa7d6f33502cc842f2001a7759da120f0fb0f84a6295c1fe81d0319 +541864f1b492230aa29853b08cf13533054817db9b19bc75ebd47201e04bd470 ' not_ok_find "$dst" "$1" ' .ssg.dst .ssg.src h.html h.html.gz +robots.txt +sitemap.xml ' cat "$dst/h.html" | not_ok_diff "$1" '

h1

' @@ -629,6 +727,9 @@ t generate_md_with_template t generate_md_template_not_found t generate_sh t generate_sh_with_collision +t generate_sitemap +t generate_sitemap_xml_found_in_src +t generate_sitemap_robots_txt_found_in_src t write_hashes basic_case && bench 4 basic_case