From 15e3b26be4ce27136f99ca8bf4c390e8b0f43aef Mon Sep 17 00:00:00 2001 From: Yuriy Yakym Date: Thu, 15 Oct 2020 02:50:54 +0200 Subject: [PATCH] Non-minified sitemaps support --- sitemap-urls.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sitemap-urls.sh b/sitemap-urls.sh index e9acd2e..4e93e5e 100755 --- a/sitemap-urls.sh +++ b/sitemap-urls.sh @@ -1,17 +1,17 @@ #!/bin/bash -# $1 - xml url parse_xml() { - urls=`curl -s $1 | sed -n 's/^.*\(.*\)<\/loc>.*$/\1/p'` - xmls=(`grep -e ".xml$" <<< $urls`) + xml=`curl -s $1` + locations=$(tr '\n' ' ' <<< "$xml" | grep -oP "(?<=)(.*?)(?=)") + sub_xmls=(`grep -e ".xml$" <<< $locations`) + pages=(`grep -v -e ".xml$" <<< $locations`) - for xml_url in "${xmls[@]}" + printf '%s\n' "${pages[@]}" >&1 + + for xml_url in "${sub_xmls[@]}" do parse_xml $xml_url done - - pages=(`grep -v -e ".xml$" <<< $urls`) - printf '%s\n' "${pages[@]}" >&1 } parse_xml $1