Move urlencode/urldecode functions to core lib
This commit is contained in:
parent
88f42b6132
commit
14b4ba83c3
@ -73,3 +73,137 @@ function env_default() {
|
|||||||
env | grep -q "^$1=" && return 0
|
env | grep -q "^$1=" && return 0
|
||||||
export "$1=$2" && return 3
|
export "$1=$2" && return 3
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Required for $langinfo
|
||||||
|
zmodload zsh/langinfo
|
||||||
|
|
||||||
|
# URL-encode a string
|
||||||
|
#
|
||||||
|
# Encodes a string using RFC 2396 URL-encoding (%-escaped).
|
||||||
|
# See: https://www.ietf.org/rfc/rfc2396.txt
|
||||||
|
#
|
||||||
|
# By default, reserved characters and unreserved "mark" characters are
|
||||||
|
# not escaped by this function. This allows the common usage of passing
|
||||||
|
# an entire URL in, and encoding just special characters in it, with
|
||||||
|
# the expectation that reserved and mark characters are used appropriately.
|
||||||
|
# The -r and -m options turn on escaping of the reserved and mark characters,
|
||||||
|
# respectively, which allows arbitrary strings to be fully escaped for
|
||||||
|
# embedding inside URLs, where reserved characters might be misinterpreted.
|
||||||
|
#
|
||||||
|
# Prints the encoded string on stdout.
|
||||||
|
# Returns nonzero if encoding failed.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# omz_urlencode [-r] [-m] <string>
|
||||||
|
#
|
||||||
|
# -r causes reserved characters (;/?:@&=+$,) to be escaped
|
||||||
|
#
|
||||||
|
# -m causes "mark" characters (_.!~*''()-) to be escaped
|
||||||
|
#
|
||||||
|
# -P causes spaces to be encoded as '%20' instead of '+'
|
||||||
|
function omz_urlencode() {
|
||||||
|
emulate -L zsh
|
||||||
|
zparseopts -D -E -a opts r m P
|
||||||
|
|
||||||
|
local in_str=$1
|
||||||
|
local url_str=""
|
||||||
|
local spaces_as_plus
|
||||||
|
if [[ -z $opts[(r)-P] ]]; then spaces_as_plus=1; fi
|
||||||
|
local str="$in_str"
|
||||||
|
|
||||||
|
# URLs must use UTF-8 encoding; convert str to UTF-8 if required
|
||||||
|
local encoding=$langinfo[CODESET]
|
||||||
|
local safe_encodings
|
||||||
|
safe_encodings=(UTF-8 utf8 US-ASCII)
|
||||||
|
if [[ -z ${safe_encodings[(r)$encoding]} ]]; then
|
||||||
|
str=$(echo -E "$str" | iconv -f $encoding -t UTF-8)
|
||||||
|
if [[ $? != 0 ]]; then
|
||||||
|
echo "Error converting string from $encoding to UTF-8" >&2
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Use LC_CTYPE=C to process text byte-by-byte
|
||||||
|
local i byte ord LC_ALL=C
|
||||||
|
export LC_ALL
|
||||||
|
local reserved=';/?:@&=+$,'
|
||||||
|
local mark='_.!~*''()-'
|
||||||
|
local dont_escape="[A-Za-z0-9"
|
||||||
|
if [[ -z $opts[(r)-r] ]]; then
|
||||||
|
dont_escape+=$reserved
|
||||||
|
fi
|
||||||
|
# $mark must be last because of the "-"
|
||||||
|
if [[ -z $opts[(r)-m] ]]; then
|
||||||
|
dont_escape+=$mark
|
||||||
|
fi
|
||||||
|
dont_escape+="]"
|
||||||
|
|
||||||
|
# Implemented to use a single printf call and avoid subshells in the loop,
|
||||||
|
# for performance (primarily on Windows).
|
||||||
|
local url_str=""
|
||||||
|
for (( i = 1; i <= ${#str}; ++i )); do
|
||||||
|
byte="$str[i]"
|
||||||
|
if [[ "$byte" =~ "$dont_escape" ]]; then
|
||||||
|
url_str+="$byte"
|
||||||
|
else
|
||||||
|
if [[ "$byte" == " " && -n $spaces_as_plus ]]; then
|
||||||
|
url_str+="+"
|
||||||
|
else
|
||||||
|
ord=$(( [##16] #byte ))
|
||||||
|
url_str+="%$ord"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
echo -E "$url_str"
|
||||||
|
}
|
||||||
|
|
||||||
|
# URL-decode a string
|
||||||
|
#
|
||||||
|
# Decodes a RFC 2396 URL-encoded (%-escaped) string.
|
||||||
|
# This decodes the '+' and '%' escapes in the input string, and leaves
|
||||||
|
# other characters unchanged. Does not enforce that the input is a
|
||||||
|
# valid URL-encoded string. This is a convenience to allow callers to
|
||||||
|
# pass in a full URL or similar strings and decode them for human
|
||||||
|
# presentation.
|
||||||
|
#
|
||||||
|
# Outputs the encoded string on stdout.
|
||||||
|
# Returns nonzero if encoding failed.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# omz_urldecode <urlstring> - prints decoded string followed by a newline
|
||||||
|
function omz_urldecode {
|
||||||
|
emulate -L zsh
|
||||||
|
local encoded_url=$1
|
||||||
|
|
||||||
|
echo -e input $1
|
||||||
|
# Work bytewise, since URLs escape UTF-8 octets
|
||||||
|
local caller_encoding=$langinfo[CODESET]
|
||||||
|
local LC_ALL=C
|
||||||
|
export LC_ALL
|
||||||
|
|
||||||
|
# Change + back to ' '
|
||||||
|
local tmp=${encoded_url:gs/+/ /}
|
||||||
|
# Protect other escapes to pass through the printf unchanged
|
||||||
|
tmp=${tmp:gs/\\/\\\\/}
|
||||||
|
# Handle %-escapes by turning them into `\xXX` printf escapes
|
||||||
|
tmp=${tmp:gs/%/\\x/}
|
||||||
|
echo -E "before decode $tmp"
|
||||||
|
local decoded
|
||||||
|
eval "decoded=\$'$tmp'"
|
||||||
|
|
||||||
|
# Now we have a UTF-8 encoded string in the variable. We need to re-encode
|
||||||
|
# it if caller is in a non-UTF-8 locale.
|
||||||
|
local safe_encodings
|
||||||
|
safe_encodings=(UTF-8 utf8 US-ASCII)
|
||||||
|
if [[ -z ${safe_encodings[(r)$caller_encoding]} ]]; then
|
||||||
|
decoded=$(echo -E "$decoded" | iconv -f UTF-8 -t $caller_encoding)
|
||||||
|
if [[ $? != 0 ]]; then
|
||||||
|
echo "Error converting string from UTF-8 to $caller_encoding" >&2
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo -E "$decoded"
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -59,44 +59,13 @@ preexec_functions+=(omz_termsupport_preexec)
|
|||||||
|
|
||||||
if [[ "$TERM_PROGRAM" == "Apple_Terminal" ]] && [[ -z "$INSIDE_EMACS" ]]; then
|
if [[ "$TERM_PROGRAM" == "Apple_Terminal" ]] && [[ -z "$INSIDE_EMACS" ]]; then
|
||||||
|
|
||||||
# URL-encodes a string
|
|
||||||
# Outputs the encoded string on stdout
|
|
||||||
# Returns nonzero if encoding failed
|
|
||||||
function _omz_urlencode() {
|
|
||||||
local str=$1
|
|
||||||
local url_str=""
|
|
||||||
|
|
||||||
# URLs must use UTF-8 encoding; convert if required
|
|
||||||
local encoding=${LC_CTYPE/*./}
|
|
||||||
if [[ -n $encoding && $encoding != UTF-8 && $encoding != utf8 ]]; then
|
|
||||||
str=$(echo $str | iconv -f $encoding -t UTF-8)
|
|
||||||
if [[ $? != 0 ]]; then
|
|
||||||
echo "Error converting string from $encoding to UTF-8" >&2
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Use LC_CTYPE=C to process text byte-by-byte
|
|
||||||
local i ch hexch LC_CTYPE=C
|
|
||||||
for ((i = 1; i <= ${#str}; ++i)); do
|
|
||||||
ch="$str[i]"
|
|
||||||
if [[ "$ch" =~ [/._~A-Za-z0-9-] ]]; then
|
|
||||||
url_str+="$ch"
|
|
||||||
else
|
|
||||||
hexch=$(printf "%02X" "'$ch")
|
|
||||||
url_str+="%$hexch"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
echo $url_str
|
|
||||||
}
|
|
||||||
|
|
||||||
# Emits the control sequence to notify Terminal.app of the cwd
|
# Emits the control sequence to notify Terminal.app of the cwd
|
||||||
function update_terminalapp_cwd() {
|
function update_terminalapp_cwd() {
|
||||||
# Identify the directory using a "file:" scheme URL, including
|
# Identify the directory using a "file:" scheme URL, including
|
||||||
# the host name to disambiguate local vs. remote paths.
|
# the host name to disambiguate local vs. remote paths.
|
||||||
|
|
||||||
# Percent-encode the pathname.
|
# Percent-encode the pathname.
|
||||||
local URL_PATH=$(_omz_urlencode $PWD)
|
local URL_PATH=$(omz_urlencode -P $PWD)
|
||||||
[[ $? != 0 ]] && return 1
|
[[ $? != 0 ]] && return 1
|
||||||
local PWD_URL="file://$HOST$URL_PATH"
|
local PWD_URL="file://$HOST$URL_PATH"
|
||||||
# Undocumented Terminal.app-specific control sequence
|
# Undocumented Terminal.app-specific control sequence
|
||||||
|
Loading…
Reference in New Issue
Block a user