Improved performance and portability by storing the blocklist in a temporary file

This commit is contained in:
Héctor Molinero Fernández 2018-06-24 23:56:49 +02:00
commit 60565f4d7a
5 changed files with 215 additions and 140 deletions

View file

@ -89,13 +89,13 @@ dist/hosts_windows.zip:
stats: stats-tlds stats-suffixes stats: stats-tlds stats-suffixes
stats-tlds: build-hosts dist/most_abused_tlds.txt stats-tlds: build-domains dist/most_abused_tlds.txt
dist/most_abused_tlds.txt: dist/most_abused_tlds.txt:
"$(MKFILE_DIR)"/resources/stats/suffix.sh dist/hosts none > dist/most_abused_tlds.txt "$(MKFILE_DIR)"/resources/stats/suffix.sh dist/hosts_domains.txt none > dist/most_abused_tlds.txt
stats-suffixes: build-hosts dist/most_abused_suffixes.txt stats-suffixes: build-domains dist/most_abused_suffixes.txt
dist/most_abused_suffixes.txt: dist/most_abused_suffixes.txt:
"$(MKFILE_DIR)"/resources/stats/suffix.sh dist/hosts > dist/most_abused_suffixes.txt "$(MKFILE_DIR)"/resources/stats/suffix.sh dist/hosts_domains.txt > dist/most_abused_suffixes.txt
index: build-hosts dist/index.html index: build-hosts dist/index.html
dist/index.html: dist/index.html:

253
hblock
View file

@ -11,7 +11,18 @@ export LC_ALL=C
# shellcheck disable=SC2039 # shellcheck disable=SC2039
HOSTNAME=${HOSTNAME-$(uname -n)} HOSTNAME=${HOSTNAME-$(uname -n)}
# Methods # Check if a program exists
checkCommand() { command -v -- "$1" >/dev/null 2>&1; }
# Escape strings in sed
# See: https://stackoverflow.com/a/29613573
quoteRe() { printf -- '%s' "$1" | sed -e 's/[^^]/[&]/g; s/\^/\\^/g; $!a'\\''"$(printf '\n')"'\\n' | tr -d '\n'; }
quoteSubst() { printf -- '%s' "$1" | sed -e ':a' -e '$!{N;ba' -e '}' -e 's/[&/\]/\\&/g; s/\n/\\&/g'; }
# Translate true/false to yes/no
getBoolVal() { [ "$1" = true ] && s='yes' || s='no'; printf -- '%s' "$s"; }
# Print to stdout if quiet mode is not enabled
printStdout() { printStdout() {
if [ "$quiet" != true ]; then if [ "$quiet" != true ]; then
# shellcheck disable=SC2059 # shellcheck disable=SC2059
@ -19,15 +30,18 @@ printStdout() {
fi fi
} }
# Print to stderr
printStderr() { printStderr() {
# shellcheck disable=SC2059 # shellcheck disable=SC2059
>&2 printf -- "$@" >&2 printf -- "$@"
} }
# Print informational message
logInfo() { logInfo() {
printStdout ' - %s\n' "$@" printStdout ' - %s\n' "$@"
} }
# Print action message
logAction() { logAction() {
if [ "$color" = true ]; then if [ "$color" = true ]; then
printStdout '\033[1;33m + \033[1;32m%s \033[0m\n' "$@" printStdout '\033[1;33m + \033[1;32m%s \033[0m\n' "$@"
@ -36,6 +50,7 @@ logAction() {
fi fi
} }
# Print error message
logError() { logError() {
if [ "$color" = true ]; then if [ "$color" = true ]; then
printStderr '\033[1;33m + \033[1;31m%s \033[0m\n' "$@" printStderr '\033[1;33m + \033[1;31m%s \033[0m\n' "$@"
@ -44,38 +59,33 @@ logError() {
fi fi
} }
getBoolVal() { # Create temporary file
[ "$1" = true ] && s='yes' || s='no' createTempFile() {
printf -- '%s' "$s" if checkCommand mktemp; then mktemp
} else # Since POSIX does not specify mktemp utility, a counter is used as a fallback
tempCounter=${tempCounter:-9999}
checkBinary() { tempFile="${TMPDIR:-/tmp}/hblock.$((tempCounter+=1))"
command -v -- "$@" >/dev/null 2>&1 rm -f -- "$tempFile" && touch -- "$tempFile"
printf -- '%s\n' "$tempFile"
fi
} }
# Print to stdout the contents of a URL
fetchUrl() { fetchUrl() {
# If the protocol is "file://" we can omit the download and simply use cat
if [ "${1#file://}" != "$1" ]; then cat -- "${1#file://}"
else
userAgent='Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0' userAgent='Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0'
if checkBinary curl; then if checkCommand curl; then curl -fsSL -A "$userAgent" -- "$1";
curl -fsSL -A "$userAgent" -- "$@" elif checkCommand wget; then wget -qO- -U "$userAgent" -- "$1";
else else
wget -qO- -U "$userAgent" -- "$@" logError 'Either wget or curl are required for this script'
fi exit 1
} fi
writeFile() {
if [ -d "$2" ]; then
logError "Cannot write '$2': is a directory"
exit 1
elif ([ -e "$2" ] && [ -w "$2" ]) || touch -- "$2" >/dev/null 2>&1; then
printf -- '%s\n' "$1" | tee -- "$2" >/dev/null
elif checkBinary sudo; then
printf -- '%s\n' "$1" | sudo tee -- "$2" >/dev/null
else
logError "Cannot write '$2': permission denied"
exit 1
fi fi
} }
# Show help and quit
showHelp() { showHelp() {
if [ $# -eq 0 ]; then if [ $# -eq 0 ]; then
printStdout '%s\n' "$(cat <<-'EOF' printStdout '%s\n' "$(cat <<-'EOF'
@ -136,6 +146,7 @@ showHelp() {
fi fi
} }
# Show version number and quit
showVersion() { showVersion() {
printStdout '%s\n' '1.6.0' printStdout '%s\n' '1.6.0'
exit 0 exit 0
@ -155,6 +166,7 @@ main() {
ff02::3 ip6-allhosts ff02::3 ip6-allhosts
EOF EOF
) )
custom=''
footer='' footer=''
template='\2 \1' template='\2 \1'
comment='#' comment='#'
@ -198,42 +210,42 @@ main() {
quiet=false quiet=false
# Transform long options to short ones # Transform long options to short ones
for opt in "$@"; do for opt in "${@-}"; do
shift shift
case "$opt" in case "$opt" in
'--output') set -- "$@" '-O' ;; '--output') set -- "${@-}" '-O' ;;
'--redirection') set -- "$@" '-R' ;; '--redirection') set -- "${@-}" '-R' ;;
'--header') set -- "$@" '-H' ;; '--header') set -- "${@-}" '-H' ;;
'--footer') set -- "$@" '-F' ;; '--footer') set -- "${@-}" '-F' ;;
'--template') set -- "$@" '-T' ;; '--template') set -- "${@-}" '-T' ;;
'--comment') set -- "$@" '-C' ;; '--comment') set -- "${@-}" '-C' ;;
'--sources') set -- "$@" '-S' ;; '--sources') set -- "${@-}" '-S' ;;
'--whitelist') set -- "$@" '-W' ;; '--whitelist') set -- "${@-}" '-W' ;;
'--blacklist') set -- "$@" '-B' ;; '--blacklist') set -- "${@-}" '-B' ;;
'--backup') set -- "$@" '-b' ;; '--backup') set -- "${@-}" '-b' ;;
'--lenient') set -- "$@" '-l' ;; '--lenient') set -- "${@-}" '-l' ;;
'--ignore-download-error') set -- "$@" '-i' ;; '--ignore-download-error') set -- "${@-}" '-i' ;;
'--color') set -- "$@" '-c' ;; '--color') set -- "${@-}" '-c' ;;
'--quiet') set -- "$@" '-q' ;; '--quiet') set -- "${@-}" '-q' ;;
'--version') set -- "$@" '-v' ;; '--version') set -- "${@-}" '-v' ;;
'--help') set -- "$@" '-h' ;; '--help') set -- "${@-}" '-h' ;;
*) set -- "$@" "$opt" *) set -- "${@-}" "$opt"
esac esac
done done
# Set omitted arguments to empty strings # Set omitted arguments to empty strings
for opt in "$@"; do for opt in "${@-}"; do
shift shift
case "$opt" in case "$opt" in
-*b) -*b)
if a="$*"; [ -z "$a" ] || [ "${a#\-}x" != "${a}x" ] if a="$*"; [ -z "$a" ] || [ "${a#\-}x" != "${a}x" ]
then set -- "$@" "$opt" '' then set -- "${@-}" "$opt" ''
else set -- "$@" "$opt" else set -- "${@-}" "$opt"
fi fi
;; ;;
*) set -- "$@" "$opt" *) set -- "${@-}" "$opt"
esac esac
done done
# Read options # Read short options
OPTIND=1 OPTIND=1
while getopts ':O:R:H:F:T:C:S:W:B:b:lic:qvh-:' opt; do while getopts ':O:R:H:F:T:C:S:W:B:b:lic:qvh-:' opt; do
case "$opt" in case "$opt" in
@ -271,44 +283,45 @@ main() {
logInfo "Lenient: $(getBoolVal "$lenient")" logInfo "Lenient: $(getBoolVal "$lenient")"
logInfo "Ignore download error: $(getBoolVal "$ignoreDownloadError")" logInfo "Ignore download error: $(getBoolVal "$ignoreDownloadError")"
# Create temporary blocklist file
blocklist=$(createTempFile)
rmtemp() { rm -f -- "$blocklist" "$blocklist.aux"; }
trap rmtemp EXIT
logAction 'Downloading lists...' logAction 'Downloading lists...'
if ! checkBinary curl && ! checkBinary wget; then
logError 'Either wget or curl are required for this script'
exit 1
fi
blocklist=''
for url in $sources; do for url in $sources; do
logInfo "$url" logInfo "$url"
content=$(fetchUrl "$url") || true fetchUrl "$url" >> "$blocklist" && exitCode=0 || exitCode=$?
if [ "$exitCode" -ne 0 ] && [ "$ignoreDownloadError" != true ]; then
if [ -z "$content" ] && [ "$ignoreDownloadError" != true ]; then
logError 'Download failed' logError 'Download failed'
exit 1 exit 1
fi fi
blocklist=$(printf -- '%s\n%s' "$blocklist" "$content")
unset content
done done
logAction 'Parsing lists...' logAction 'Parsing lists...'
if [ -n "$blocklist" ]; then if [ -s "$blocklist" ]; then
logInfo 'Remove carriage return' logInfo 'Remove carriage return'
blocklist=$(printf -- '%s' "$blocklist" | tr -d '\r') tr -d '\r' \
< "$blocklist" > "$blocklist.aux" \
&& mv -f -- "$blocklist.aux" "$blocklist"
logInfo 'Transform to lowercase' logInfo 'Transform to lowercase'
blocklist=$(printf -- '%s' "$blocklist" | tr '[:upper:]' '[:lower:]') tr '[:upper:]' '[:lower:]' \
< "$blocklist" > "$blocklist.aux" \
&& mv -f -- "$blocklist.aux" "$blocklist"
logInfo 'Remove comments' logInfo 'Remove comments'
blocklist=$(printf -- '%s' "$blocklist" | sed 's/#.*//') sed -e 's/#.*//' \
-- "$blocklist" > "$blocklist.aux" \
&& mv -f -- "$blocklist.aux" "$blocklist"
logInfo 'Trim spaces' logInfo 'Trim spaces'
blocklist=$(printf -- '%s' "$blocklist" | sed \ sed -e 's/^[[:blank:]]*//' \
-e 's/^[[:blank:]]*//' \ -e 's/[[:blank:]]*$//' \
-e 's/[[:blank:]]*$//' -- "$blocklist" > "$blocklist.aux" \
) && mv -f -- "$blocklist.aux" "$blocklist"
logInfo 'Match hosts lines' logInfo 'Match hosts lines'
if [ "$lenient" = true ]; then if [ "$lenient" = true ]; then
@ -322,25 +335,32 @@ main() {
ipRegex='\(0\.0\.0\.0\)\{0,1\}\(127\.0\.0\.1\)\{0,1\}' ipRegex='\(0\.0\.0\.0\)\{0,1\}\(127\.0\.0\.1\)\{0,1\}'
fi fi
domainRegex='\([0-9a-z_-]\{1,63\}\.\)\{1,\}[a-z][0-9a-z_-]\{1,62\}' domainRegex='\([0-9a-z_-]\{1,63\}\.\)\{1,\}[a-z][0-9a-z_-]\{1,62\}'
blocklist=$(printf -- '%s' "$blocklist" | sed -n "/^\\(${ipRegex}[[:blank:]]\\{1,\\}\\)\\{0,1\\}$domainRegex$/p") sed -n \
-e "/^\\(${ipRegex}[[:blank:]]\\{1,\\}\\)\\{0,1\\}$domainRegex$/p" \
-- "$blocklist" > "$blocklist.aux" \
&& mv -f -- "$blocklist.aux" "$blocklist"
logInfo 'Remove reserved TLDs' logInfo 'Remove reserved TLDs'
blocklist=$(printf -- '%s' "$blocklist" | sed \ sed -e '/\.example$/d' \
-e '/\.example$/d' \
-e '/\.invalid$/d' \ -e '/\.invalid$/d' \
-e '/\.local$/d' \ -e '/\.local$/d' \
-e '/\.localdomain$/d' \ -e '/\.localdomain$/d' \
-e '/\.localhost$/d' \ -e '/\.localhost$/d' \
-e '/\.test$/d' -e '/\.test$/d' \
) -- "$blocklist" > "$blocklist.aux" \
&& mv -f -- "$blocklist.aux" "$blocklist"
logInfo 'Remove destination IPs' logInfo 'Remove destination IPs'
blocklist=$(printf -- '%s' "$blocklist" | sed 's/^.\{1,\}[[:blank:]]\{1,\}//') sed -e 's/^.\{1,\}[[:blank:]]\{1,\}//' \
-- "$blocklist" > "$blocklist.aux" \
&& mv -f -- "$blocklist.aux" "$blocklist"
if [ -n "$whitelist" ]; then if [ -n "$whitelist" ]; then
logInfo 'Apply whitelist' logInfo 'Apply whitelist'
for domain in $whitelist; do for domain in $whitelist; do
blocklist=$(printf -- '%s' "$blocklist" | sed "/$domain/d") sed -e "/$domain/d" \
-- "$blocklist" > "$blocklist.aux" \
&& mv -f -- "$blocklist.aux" "$blocklist"
done done
fi fi
fi fi
@ -348,51 +368,55 @@ main() {
if [ -n "$blacklist" ]; then if [ -n "$blacklist" ]; then
logInfo 'Apply blacklist' logInfo 'Apply blacklist'
for domain in $blacklist; do for domain in $blacklist; do
blocklist=$(printf -- '%s\n%s' "$blocklist" "$domain") printf -- '%s\n' "$domain" >> "$blocklist"
done done
fi fi
# This domain is used to check if hBlock is enabled # This domain is used to check if hBlock is enabled
blocklist=$(printf -- '%s\n%s' "$blocklist" 'hblock-check.molinero.xyz') printf -- '%s\n' 'hblock-check.molinero.xyz' >> "$blocklist"
logInfo 'Sort entries' logInfo 'Sort entries'
blocklist=$(printf -- '%s' "$blocklist" | sort | uniq | sed '/^$/d') sort -- "$blocklist" | uniq | sed -e '/^$/d' > "$blocklist.aux" \
&& mv -f -- "$blocklist.aux" "$blocklist"
# Count blocked domains # Count blocked domains
blocklistCount=$([ -n "$blocklist" ] && printf -- '%s\n' "$blocklist" | wc -l | tr -d '[:blank:]' || printf '0') blocklistCount=$(wc -l -- "$blocklist" | cut -d' ' -f1)
logInfo 'Apply format template' logInfo 'Apply format template'
# Escape string literal for use as the replacement string in sed sed -e "s/$/\t$(quoteSubst "$redirection")/" \
escapedRedirection=$(printf -- '%s' "$redirection" | sed 's/[&/\]/\\&/g') -e "s/^\(.*\)\t\(.*\)$/$template/" \
blocklist=$(printf -- '%s' "$blocklist" | sed "s/$/\t$escapedRedirection/") -- "$blocklist" > "$blocklist.aux" \
blocklist=$(printf -- '%s' "$blocklist" | sed "s/^\(.*\)\t\(.*\)$/$template/") && mv -f -- "$blocklist.aux" "$blocklist"
# Define "C" variable for convenience # Define "C" variable for convenience
C=$comment C=$comment
if [ "$output" != - ] && [ -f "$output" ]; then if [ "$output" != - ] && [ -f "$output" ]; then
content=$(cat -- "$output")
# Get custom section # Get custom section
if [ -n "$C" ]; then if [ -n "$C" ]; then
logAction 'Reading custom section...' logAction 'Reading custom section...'
custom=$(printf -- '%s' "$content" | sed "/^$C.*<custom>/,/^$C.*<\/custom>/!d;/^$C.*<\(\/\|\)custom>/d") custom=$(sed -e "/^$C.*<custom>/,/^$C.*<\/custom>/!d;/^$C.*<\(\/\|\)custom>/d" -- "$output")
fi fi
# Backup procedure # Backup procedure
if [ "$backup" = true ]; then if [ "$backup" = true ]; then
logAction 'Backing up original file...' logAction 'Backing up original file...'
[ -z "$backupDir" ] && backupDir=$(dirname -- "$output") [ -z "$backupDir" ] && backupDir=$(dirname -- "$output")
writeFile "$content" "$backupDir/$(basename -- "$output").$(date +%s).bak" backupOutput="$backupDir/$(basename -- "$output").$(date +%s).bak"
if touch -- "$backupOutput" >/dev/null 2>&1; then
cp -af -- "$output" "$backupOutput"
elif checkCommand sudo; then
sudo cp -af -- "$output" "$backupOutput"
else
logError "Cannot write '$backupOutput': permission denied"
exit 1
fi
fi fi
unset content
fi fi
logAction 'Generating output file...' logAction 'Generating output file...'
# Output file printOutputFile() {
printBanner() {
if [ -n "$C" ]; then if [ -n "$C" ]; then
printf -- '%s\n' "$(cat <<-EOF printf -- '%s\n' "$(cat <<-EOF
$C Author: Héctor Molinero Fernández <hector@molinero.xyz> $C Author: Héctor Molinero Fernández <hector@molinero.xyz>
@ -402,29 +426,46 @@ main() {
EOF EOF
)" )"
fi fi
} if [ -n "$header" ]; then
printSection() { if [ -n "$C" ]; then printf -- '\n%s\n' "$C <header>"; fi
if [ -n "$2" ]; then printf -- '%s\n' "$header"
if [ -n "$C" ]; then printf -- '\n%s\n' "$C <$1>"; fi if [ -n "$C" ]; then printf -- '%s\n' "$C </header>"; fi
printf -- '%s\n' "$2" fi
if [ -n "$C" ]; then printf -- '%s\n' "$C </$1>"; fi if [ -n "$custom" ]; then
if [ -n "$C" ]; then printf -- '\n%s\n' "$C <custom>"; fi
printf -- '%s\n' "$custom"
if [ -n "$C" ]; then printf -- '%s\n' "$C </custom>"; fi
fi
if [ -n "$blocklist" ]; then
if [ -n "$C" ]; then printf -- '\n%s\n' "$C <blocklist>"; fi
cat -- "$blocklist"
if [ -n "$C" ]; then printf -- '%s\n' "$C </blocklist>"; fi
fi
if [ -n "$footer" ]; then
if [ -n "$C" ]; then printf -- '\n%s\n' "$C <footer>"; fi
printf -- '%s\n' "$footer"
if [ -n "$C" ]; then printf -- '%s\n' "$C </footer>"; fi
fi fi
} }
content=$(printBanner
printSection header "${header-}"
printSection custom "${custom-}"
printSection blocklist "${blocklist-}"
printSection footer "${footer-}"
)
# Print to stdout if the output value is equal to - # Print to stdout if the output value is equal to -
if [ "$output" = - ]; then if [ "$output" = - ]; then
printf -- '%s\n' "$content" printOutputFile
else else
writeFile "$content" "$output" if [ -d "$output" ]; then
logError "Cannot write '$output': is a directory"
exit 1
elif touch -- "$output" >/dev/null 2>&1; then
printOutputFile > "$output"
elif checkCommand sudo && checkCommand tee; then
printOutputFile | sudo tee -- "$output" >/dev/null
else
logError "Cannot write '$output': permission denied"
exit 1
fi
fi fi
logAction "$blocklistCount blocked domains!" logAction "$blocklistCount blocked domains!"
} }
main "$@" main "${@-}"

View file

@ -7,8 +7,32 @@
set -eu set -eu
export LC_ALL=C export LC_ALL=C
# Check if a program exists
checkCommand() { command -v -- "$1" >/dev/null 2>&1; }
# Create temporary file
createTempFile() {
if checkCommand mktemp; then mktemp
else # Since POSIX does not specify mktemp utility, a counter is used as a fallback
tempCounter=${tempCounter:-9999}
tempFile="${TMPDIR:-/tmp}/hblock-stats.$((tempCounter+=1))"
rm -f -- "$tempFile" && touch -- "$tempFile"
printf -- '%s\n' "$tempFile"
fi
}
# Print to stdout the contents of a URL
fetchUrl() {
if checkCommand curl; then curl -fsSL -- "$1";
elif checkCommand wget; then wget -qO- -- "$1";
else
logError 'Either wget or curl are required for this script'
exit 1
fi
}
main() { main() {
file="${1:-/etc/hosts}" file="${1:?}"
publicSuffixList="${2:-https://publicsuffix.org/list/public_suffix_list.dat}" publicSuffixList="${2:-https://publicsuffix.org/list/public_suffix_list.dat}"
if ! [ -f "$file" ] || ! [ -r "$file" ]; then if ! [ -f "$file" ] || ! [ -r "$file" ]; then
@ -19,44 +43,49 @@ main() {
header=$(printf -- '%s\t%s\t%s\n' 'Top' 'Hosts' 'Suffix') header=$(printf -- '%s\t%s\t%s\n' 'Top' 'Hosts' 'Suffix')
stats='' stats=''
# Get blocklist content # Create temporary blocklist file
blocklist=$(cat -- "$file" | sed '/^#.*<blocklist>/,/^#.*<\/blocklist>/!d;/^\s*#.*$/d') blocklist=$(createTempFile)
cp -f -- "$file" "$blocklist"
rmtemp() { rm -f -- "$blocklist" "$blocklist".*; }
trap rmtemp EXIT
# Compact blocklist content (remove lowest level domain and count ocurrences) # Compact blocklist content (remove lowest level domain and count ocurrences)
blocklist=$(printf -- '%s' "$blocklist" | sed 's/^.\{1,\}[[:blank:]][^.]\{1,\}//' | sort | uniq -c) sed -e 's/^.\{1,\}[[:blank:]][^.]\{1,\}//' -- "$blocklist" \
| sort | uniq -c > "$blocklist.aux" \
&& mv -f -- "$blocklist.aux" "$blocklist"
if [ "$publicSuffixList" != 'none' ]; then if [ "$publicSuffixList" != 'none' ]; then
# Download public suffix list # Download public suffix list
suffixes=$(curl -fsSL -- "$publicSuffixList") curl -fsSL -- "$publicSuffixList" > "$blocklist.suffixes"
# Transform suffix list (punycode encode and sort by length in descending order) # Transform suffix list (punycode encode and sort by length in descending order)
suffixes=$(printf -- '%s' "$suffixes" | sed -e '/^\/\//d;/^!/d;/^$/d;s/^\*\.//g' -- "$blocklist.suffixes" \
sed '/^\/\//d;/^!/d;/^$/d;s/^\*\.//g' | CHARSET=UTF-8 idn | | CHARSET=UTF-8 idn | awk '{print(length($0)":."$0)}' \
awk '{print(length($0)":."$0)}' | sort -nr | cut -d: -f2 | sort -nr | cut -d: -f2 > "$blocklist.aux" \
) && mv -f -- "$blocklist.aux" "$blocklist.suffixes"
# Create regex pattern for each suffix # Create regex pattern for each suffix
suffixesRegex=$(printf -- '%s' "$suffixes" | sed 's/\./\\./g;s/$/$/g') sed -e 's/\./\\./g;s/$/$/g' \
-- "$blocklist.suffixes" > "$blocklist.aux" \
&& mv -f -- "$blocklist.aux" "$blocklist.suffixes"
# Count blocklist matches for each suffix # Count blocklist matches for each suffix
for regex in $suffixesRegex; do while read -r regex; do
match=$(printf -- '%s' "$blocklist" | grep -- "$regex") || true if grep -- "$regex" "$blocklist" > "$blocklist.match"; then
count=$(awk '{s+=$1}END{print(s)}' "$blocklist.match")
if [ -n "$match" ]; then
count=$(printf -- '%s' "$match" | awk '{s+=$1}END{print(s)}')
stats=$(printf -- '%s\t%s\n%s' "$count" "$regex" "$stats") stats=$(printf -- '%s\t%s\n%s' "$count" "$regex" "$stats")
blocklist=$(printf -- '%s' "$blocklist" | grep -v -- "$regex") || true (grep -v -- "$regex" "$blocklist" > "$blocklist.aux" \
&& mv -f -- "$blocklist.aux" "$blocklist") || true
fi fi
done done < "$blocklist.suffixes"
# Undo regex pattern # Undo regex pattern
stats=$(printf -- '%s' "$stats" | sed 's/\\\././g;s/\$$//g') stats=$(printf -- '%s' "$stats" | sed 's/\\\././g;s/\$$//g')
fi fi
# If blocklist is not empty use TLD as suffix # If blocklist is not empty use TLD as suffix
if [ -n "$blocklist" ]; then if [ -s "$blocklist" ]; then
tldStats=$(printf -- '%s' "$blocklist" | tldStats=$(sed -e 's/^\(.\{1,\}[[:blank:]]\).*\(\.[^.]\{1,\}\)$/\1\2/g' -- "$blocklist" |
sed 's/^\(.\{1,\}[[:blank:]]\).*\(\.[^.]\{1,\}\)$/\1\2/g' |
awk '{arr[$2]+=$1;}END{for (i in arr) print(arr[i]"\t"i)}' awk '{arr[$2]+=$1;}END{for (i in arr) print(arr[i]"\t"i)}'
) )
@ -69,4 +98,4 @@ main() {
printf -- '%s\n%s\n' "$header" "$stats" printf -- '%s\n%s\n' "$header" "$stats"
} }
main "$@" main "${@-}"

View file

@ -7,16 +7,20 @@
set -eu set -eu
export LC_ALL=C export LC_ALL=C
endsWith() { # Check if a program exists
[ "${1%$2}" != "$1" ] checkCommand() { command -v -- "$1" >/dev/null 2>&1; }
}
# Check whether a string ends with the characters of a specified string
endsWith() { str=$1 && substr=$2 && [ "${str%$substr}" != "$str" ]; }
# Escape string for use in HTML
escapeHTML() { escapeHTML() {
printf -- '%s' "$1" | \ printf -- '%s' "$1" | \
sed 's|&|\&#38;|g;s|<|\&#60;|g;s|>|\&#62;|g;s|"|\&#34;|g;s|'\''|\&#39;|g' | \ sed -e 's|&|\&#38;|g;s|<|\&#60;|g;s|>|\&#62;|g;s|"|\&#34;|g;s|'\''|\&#39;|g' | \
sed -e ':a' -e 'N' -e '$!ba' -e 's/\n/\&#10;/g' sed -e ':a' -e 'N' -e '$!ba' -e 's/\n/\&#10;/g'
} }
# RFC 3986 compliant URL encoding method
encodeURI() { encodeURI() {
_LC_COLLATE=${LC_COLLATE-}; LC_COLLATE=C; _IFS=$IFS; IFS=: _LC_COLLATE=${LC_COLLATE-}; LC_COLLATE=C; _IFS=$IFS; IFS=:
hex=$(printf -- '%s' "$1" | hexdump -ve '/1 ":%02X"'); hex=${hex#:} hex=$(printf -- '%s' "$1" | hexdump -ve '/1 ":%02X"'); hex=${hex#:}
@ -58,11 +62,11 @@ main() {
escapedFileSize=$fileSize escapedFileSize=$fileSize
fi fi
if command -v file >/dev/null; then if checkCommand file; then
fileType=$(file -bL --mime-type "$file") fileType=$(file -bL --mime-type "$file")
escapedFileType=$(escapeHTML "$fileType") escapedFileType=$(escapeHTML "$fileType")
else else
fileType=$(printf '\x20') fileType=$(printf 'application/octet-stream')
escapedFileType=$fileType escapedFileType=$fileType
fi fi
@ -249,4 +253,4 @@ main() {
)" )"
} }
main "$@" main "${@-}"

View file

@ -6,7 +6,8 @@ scriptDir=$(dirname "$(readlink -f "$0")")
baseDir="$scriptDir/.." baseDir="$scriptDir/.."
action="${1:-nothing}" action="${1:-nothing}"
# Escape strings in sed: https://stackoverflow.com/a/29613573 # Escape strings in sed
# See: https://stackoverflow.com/a/29613573
quoteRe() { printf -- '%s' "$1" | sed -e 's/[^^]/[&]/g; s/\^/\\^/g; $!a'\\''"$(printf '\n')"'\\n' | tr -d '\n'; } quoteRe() { printf -- '%s' "$1" | sed -e 's/[^^]/[&]/g; s/\^/\\^/g; $!a'\\''"$(printf '\n')"'\\n' | tr -d '\n'; }
quoteSubst() { printf -- '%s' "$1" | sed -e ':a' -e '$!{N;ba' -e '}' -e 's/[&/\]/\\&/g; s/\n/\\&/g'; } quoteSubst() { printf -- '%s' "$1" | sed -e ':a' -e '$!{N;ba' -e '}' -e 's/[&/\]/\\&/g; s/\n/\\&/g'; }
replaceLiteral() { sed -i -- "s/$(quoteRe "$1")/$(quoteSubst "$2")/g" "$3"; } replaceLiteral() { sed -i -- "s/$(quoteRe "$1")/$(quoteSubst "$2")/g" "$3"; }