我试图写一个bash脚本测试,需要一个参数,并通过curl发送到网站。我需要url编码的值,以确保特殊字符被正确处理。最好的方法是什么?
这是我到目前为止的基本脚本:
#!/bin/bash
host=${1:?'bad host'}
value=$2
shift
shift
curl -v -d "param=${value}" http://${host}/somepath $@
我试图写一个bash脚本测试,需要一个参数,并通过curl发送到网站。我需要url编码的值,以确保特殊字符被正确处理。最好的方法是什么?
这是我到目前为止的基本脚本:
#!/bin/bash
host=${1:?'bad host'}
value=$2
shift
shift
curl -v -d "param=${value}" http://${host}/somepath $@
当前回答
Note
这些函数不是用来编码URL的数据,而是用来编码URL。 将url以每行一个的方式放入文件中。
#!/bin/dash
replaceUnicodes () { # $1=input/output file
if ! mv -f "$1" "$1".tmp 2>/dev/null; then return 1; fi
output="$1" awk '
function hexValue(chr) {
if(chr=="0") return 0; if(chr=="1") return 1; if(chr=="2") return 2; if(chr=="3") return 3; if(chr=="4") return 4; if(chr=="5") return 5;
if(chr=="6") return 6; if(chr=="7") return 7; if(chr=="8") return 8; if(chr=="9") return 9; if(chr=="A") return 10;
if(chr=="B") return 11; if(chr=="C") return 12; if(chr=="D") return 13; if(chr=="E") return 14; return 15 }
function hexToDecimal(str, value,i,inc) {
str=toupper(str); value=and(hexValue(substr(str,length(str),1)),15); inc=1;
for(i=length(str)-1;i>0;i--) {
value+=lshift(hexValue(substr(str,i,1)),4*inc++)
} return value }
function toDecimal(str, value,i) {
for(i=1;i<=length(str);i++) {
value=(value*10)+substr(str,i,1)
} return value }
function to32BE(high,low) {
# return 0x10000+((high-0xD800)*0x400)+(low-0xDC00) }
return lshift((high-0xD800),10)+(low-0xDC00)+0x10000 }
function toUTF8(value) {
if(value<0x80) {
return sprintf("%%%02X",value)
} else if(value>0xFFFF) {
return sprintf("%%%02X%%%02X%%%02X%%%02X",or(0xF0,and(rshift(value,18),0x07)),or(0x80,and(rshift(value,12),0x3F)),or(0x80,and(rshift(value,6),0x3F)),or(0x80,and(rshift(value,0),0x3F)))
} else if(value>0x07FF) {
return sprintf("%%%02X%%%02X%%%02X",or(0xE0,and(rshift(value,12),0x0F)),or(0x80,and(rshift(value,6),0x3F)),or(0x80,and(rshift(value,0),0x3F)))
} else { return sprintf("%%%02X%%%02X",or(0xC0,and(rshift(value,6),0x1F)),or(0x80,and(rshift(value,0),0x3F))) }
}
function trap(str) { sub(/^\\+/,"\\",str); return str }
function esc(str) { gsub(/\\/,"\\\\",str); return str }
BEGIN { output=ENVIRON["output"] }
{
finalStr=""; while(match($0,/[\\]+u[0-9a-fA-F]{4}/)) {
p=substr($0,RSTART,RLENGTH); num=hexToDecimal(substr(p,RLENGTH-3,4));
bfrStr=substr($0,1,RSTART-1); $0=substr($0,RSTART+RLENGTH,length($0)-(RSTART+RLENGTH-1));
if(surrogate) {
surrogate=0;
if(RSTART!=1 || num<0xD800 || (num>0xDBFF && num<0xDC00) || num>0xDFFF) {
finalStr=sprintf("%s%s%s%s",finalStr,trap(highP),bfrStr,toUTF8(num))
} else if(num>0xD7FF && num<0xDC00) {
surrogate=1; high=num; finalStr=sprintf("%s%s",finalStr,trap(highP))
} else { finalStr=sprintf("%s%s",finalStr,toUTF8(to32BE(high,num))) }
} else if(num>0xD7FF && num<0xDC00) {
surrogate=1; highP=p; high=num; finalStr=sprintf("%s%s",finalStr,bfrStr)
} else { finalStr=sprintf("%s%s%s",finalStr,bfrStr,toUTF8(num)) }
} finalStr=sprintf("%s%s",finalStr,$0); $0=finalStr
while(match($0,/[\\]+U[0-9a-fA-F]{8}/)) {
str=substr($0,RSTART,RLENGTH); gsub(esc(str),toUTF8(hexToDecimal(substr(str,RLENGTH-7,8))),$0)
}
while(match($0,/[\\]*&#[xX][0-9a-fA-F]{1,8};/)) {
str=substr($0,RSTART,RLENGTH); idx=index(str,"#");
gsub(esc(str),toUTF8(hexToDecimal(substr(str,idx+2,RLENGTH-idx-2))),$0)
}
while(match($0,/[\\]*&#[0-9]{1,10};/)) {
str=substr($0,RSTART,RLENGTH); idx=index(str,"#");
gsub(esc(str),toUTF8(toDecimal(substr(str,idx+1,RLENGTH-idx-1))),$0)
}
printf("%s\n",$0) > output
}' "$1".tmp
rm -f "$1".tmp
}
replaceHtmlEntities () { # $1=input/output file
if ! mv -f "$1" "$1".tmp 2>/dev/null; then return 1; fi
sed 's/%3[aA]/:/g; s/%2[fF]/\//g; s/"/%22/g; s/</%3C/g; s/>/%3E/g; s/ /%A0/g; s/¢/%A2/g; s/£/%A3/g; s/¥/%A5/g; s/©/%A9/g; s/®/%AE/g; s/&/\&/g; s/\\*\//\//g' "$1".tmp > "$1"
rm -f "$1".tmp
}
# "od -v -A n -t u1 -w99999999"
# "hexdump -v -e \47/1 \42%d \42\47"
# Reminder :: Do not encode (, ), [, and ].
toUTF8Encoded () { # $1=input/output file
if ! mv -f "$1" "$1".tmp 2>/dev/null; then return 1; fi
if [ -s "$1".tmp ]; then
# od -A n -t u1 -w99999999 "$1".tmp | \
hexdump -v -e '/1 "%d "' "$1".tmp | \
output="$1" awk 'function hexDigit(chr) { if((chr>47 && chr<58) || (chr>64 && chr<71) || (chr>96 && chr<103)) return 1; return 0 }
BEGIN { output=ENVIRON["output"] }
{ for(i=1;i<=NF;i++) {
flushed=0; c=$(i);
if(c==13) { if($(i+1)==10) i++; printf("%s\n",url) > output; url=""; flushed=1
} else if(c==10) { printf("%s\n",url) > output; url=""; flushed=1
} else if(c==37) {
if(hexDigit($(i+1)) && hexDigit($(i+2))) {
url=sprintf("%s%%%c%c",url,$(i+1),$(i+2)); i+=2
} else { url=sprintf("%s%%25",url) }
} else if(c>32 && c<127 && c!=34 && c!=39 && c!=96 && c!=60 && c!=62) {
url=sprintf("%s%c",url,c)
} else { url=sprintf("%s%%%02X",url,c) }
} if(!flushed) printf("%s\n",url) > output
}'
fi
rm -f "$1".tmp
}
调用replacecodes () -> replacemlentities () --> toUTF8Encoded()
其他回答
问题是关于在bash中这样做,不需要python或perl,因为实际上有一个命令,它完全是你想要的-“urlencode”。
value=$(urlencode "${2}")
这也更好,因为上面的perl答案,例如,没有正确编码所有字符。尝试使用从Word中获得的长破折号,你会得到错误的编码。
注意,你需要安装"gridsite-clients"来提供这个命令:
sudo apt install gridsite-clients
下面是我的嵌入式系统busybox ash shell版本,我最初采用了Orwellophile的变体:
urlencode()
{
local S="${1}"
local encoded=""
local ch
local o
for i in $(seq 0 $((${#S} - 1)) )
do
ch=${S:$i:1}
case "${ch}" in
[-_.~a-zA-Z0-9])
o="${ch}"
;;
*)
o=$(printf '%%%02x' "'$ch")
;;
esac
encoded="${encoded}${o}"
done
echo ${encoded}
}
urldecode()
{
# urldecode <string>
local url_encoded="${1//+/ }"
printf '%b' "${url_encoded//%/\\x}"
}
这个基于nodejs的答案将在stdin上使用encodeURIComponent:
uriencode_stdin() {
node -p 'encodeURIComponent(require("fs").readFileSync(0))'
}
echo -n $'hello\nwörld' | uriencode_stdin
hello%0Aw%C3%B6rld
awk版本的直接链接:http://www.shelldorado.com/scripts/cmds/urlencode 我用了很多年了,效果很好
:
##########################################################################
# Title : urlencode - encode URL data
# Author : Heiner Steven (heiner.steven@odn.de)
# Date : 2000-03-15
# Requires : awk
# Categories : File Conversion, WWW, CGI
# SCCS-Id. : @(#) urlencode 1.4 06/10/29
##########################################################################
# Description
# Encode data according to
# RFC 1738: "Uniform Resource Locators (URL)" and
# RFC 1866: "Hypertext Markup Language - 2.0" (HTML)
#
# This encoding is used i.e. for the MIME type
# "application/x-www-form-urlencoded"
#
# Notes
# o The default behaviour is not to encode the line endings. This
# may not be what was intended, because the result will be
# multiple lines of output (which cannot be used in an URL or a
# HTTP "POST" request). If the desired output should be one
# line, use the "-l" option.
#
# o The "-l" option assumes, that the end-of-line is denoted by
# the character LF (ASCII 10). This is not true for Windows or
# Mac systems, where the end of a line is denoted by the two
# characters CR LF (ASCII 13 10).
# We use this for symmetry; data processed in the following way:
# cat | urlencode -l | urldecode -l
# should (and will) result in the original data
#
# o Large lines (or binary files) will break many AWK
# implementations. If you get the message
# awk: record `...' too long
# record number xxx
# consider using GNU AWK (gawk).
#
# o urlencode will always terminate it's output with an EOL
# character
#
# Thanks to Stefan Brozinski for pointing out a bug related to non-standard
# locales.
#
# See also
# urldecode
##########################################################################
PN=`basename "$0"` # Program name
VER='1.4'
: ${AWK=awk}
Usage () {
echo >&2 "$PN - encode URL data, $VER
usage: $PN [-l] [file ...]
-l: encode line endings (result will be one line of output)
The default is to encode each input line on its own."
exit 1
}
Msg () {
for MsgLine
do echo "$PN: $MsgLine" >&2
done
}
Fatal () { Msg "$@"; exit 1; }
set -- `getopt hl "$@" 2>/dev/null` || Usage
[ $# -lt 1 ] && Usage # "getopt" detected an error
EncodeEOL=no
while [ $# -gt 0 ]
do
case "$1" in
-l) EncodeEOL=yes;;
--) shift; break;;
-h) Usage;;
-*) Usage;;
*) break;; # First file name
esac
shift
done
LANG=C export LANG
$AWK '
BEGIN {
# We assume an awk implementation that is just plain dumb.
# We will convert an character to its ASCII value with the
# table ord[], and produce two-digit hexadecimal output
# without the printf("%02X") feature.
EOL = "%0A" # "end of line" string (encoded)
split ("1 2 3 4 5 6 7 8 9 A B C D E F", hextab, " ")
hextab [0] = 0
for ( i=1; i<=255; ++i ) ord [ sprintf ("%c", i) "" ] = i + 0
if ("'"$EncodeEOL"'" == "yes") EncodeEOL = 1; else EncodeEOL = 0
}
{
encoded = ""
for ( i=1; i<=length ($0); ++i ) {
c = substr ($0, i, 1)
if ( c ~ /[a-zA-Z0-9.-]/ ) {
encoded = encoded c # safe character
} else if ( c == " " ) {
encoded = encoded "+" # special handling
} else {
# unsafe character, encode it as a two-digit hex-number
lo = ord [c] % 16
hi = int (ord [c] / 16);
encoded = encoded "%" hextab [hi] hextab [lo]
}
}
if ( EncodeEOL ) {
printf ("%s", encoded EOL)
} else {
print encoded
}
}
END {
#if ( EncodeEOL ) print ""
}
' "$@"
其中一种变体可能很丑,但很简单:
urlencode() {
local data
if [[ $# != 1 ]]; then
echo "Usage: $0 string-to-urlencode"
return 1
fi
data="$(curl -s -o /dev/null -w %{url_effective} --get --data-urlencode "$1" "")"
if [[ $? != 3 ]]; then
echo "Unexpected error" 1>&2
return 2
fi
echo "${data##/?}"
return 0
}
下面是一个单行版本的例子(由Bruno建议):
date | curl -Gso /dev/null -w %{url_effective} --data-urlencode @- "" | cut -c 3-
# If you experience the trailing %0A, use
date | curl -Gso /dev/null -w %{url_effective} --data-urlencode @- "" | sed -E 's/..(.*).../\1/'