如何为curl命令urlencode数据?

我试图写一个bash脚本测试，需要一个参数，并通过curl发送到网站。我需要url编码的值，以确保特殊字符被正确处理。最好的方法是什么?

这是我到目前为止的基本脚本:

#!/bin/bash
host=${1:?'bad host'}
value=$2
shift
shift
curl -v -d "param=${value}" http://${host}/somepath $@

当前回答

下面是一个POSIX函数:

url_encode() {
   awk 'BEGIN {
      for (n = 0; n < 125; n++) {
         m[sprintf("%c", n)] = n
      }
      n = 1
      while (1) {
         s = substr(ARGV[1], n, 1)
         if (s == "") {
            break
         }
         t = s ~ /[[:alnum:]_.!~*\47()-]/ ? t s : t sprintf("%%%02X", m[s])
         n++
      }
      print t
   }' "$1"
}

例子:

value=$(url_encode "$2")

2016-12-31 05:14:25

其他回答

url=$(echo "$1" | sed -e 's/%/%25/g' -e 's/ /%20/g' -e 's/!/%21/g' -e 's/"/%22/g' -e 's/#/%23/g' -e 's/\$/%24/g' -e 's/\&/%26/g' -e 's/'\''/%27/g' -e 's/(/%28/g' -e 's/)/%29/g' -e 's/\*/%2a/g' -e 's/+/%2b/g' -e 's/,/%2c/g' -e 's/-/%2d/g' -e 's/\./%2e/g' -e 's/\//%2f/g' -e 's/:/%3a/g' -e 's/;/%3b/g' -e 's//%3e/g' -e 's/?/%3f/g' -e 's/@/%40/g' -e 's/\[/%5b/g' -e 's/\\/%5c/g' -e 's/\]/%5d/g' -e 's/\^/%5e/g' -e 's/_/%5f/g' -e 's/`/%60/g' -e 's/{/%7b/g' -e 's/|/%7c/g' -e 's/}/%7d/g' -e 's/~/%7e/g')

这将对$1中的字符串进行编码，并将其输出到$url中。尽管你不需要把它放在var中。顺便说一句，没有包括sed for选项卡，认为它会把它变成空格

2011-01-11 12:51:29

你可以在perl中模拟javascript的encodeURIComponent。下面是命令:

perl -pe 's/([^a-zA-Z0-9_.!~*()'\''-])/sprintf("%%%02X", ord($1))/ge'

你可以在.bash_profile中设置它为bash别名:

alias encodeURIComponent='perl -pe '\''s/([^a-zA-Z0-9_.!~*()'\''\'\'''\''-])/sprintf("%%%02X",ord($1))/ge'\'

现在你可以管道到encodeURIComponent:

$ echo -n 'hèllo wôrld!' | encodeURIComponent
h%C3%A8llo%20w%C3%B4rld!

2015-01-20 21:08:51

awk版本的直接链接:http://www.shelldorado.com/scripts/cmds/urlencode 我用了很多年了，效果很好

:
##########################################################################
# Title      :  urlencode - encode URL data
# Author     :  Heiner Steven (heiner.steven@odn.de)
# Date       :  2000-03-15
# Requires   :  awk
# Categories :  File Conversion, WWW, CGI
# SCCS-Id.   :  @(#) urlencode  1.4 06/10/29
##########################################################################
# Description
#   Encode data according to
#       RFC 1738: "Uniform Resource Locators (URL)" and
#       RFC 1866: "Hypertext Markup Language - 2.0" (HTML)
#
#   This encoding is used i.e. for the MIME type
#   "application/x-www-form-urlencoded"
#
# Notes
#    o  The default behaviour is not to encode the line endings. This
#   may not be what was intended, because the result will be
#   multiple lines of output (which cannot be used in an URL or a
#   HTTP "POST" request). If the desired output should be one
#   line, use the "-l" option.
#
#    o  The "-l" option assumes, that the end-of-line is denoted by
#   the character LF (ASCII 10). This is not true for Windows or
#   Mac systems, where the end of a line is denoted by the two
#   characters CR LF (ASCII 13 10).
#   We use this for symmetry; data processed in the following way:
#       cat | urlencode -l | urldecode -l
#   should (and will) result in the original data
#
#    o  Large lines (or binary files) will break many AWK
#       implementations. If you get the message
#       awk: record `...' too long
#        record number xxx
#   consider using GNU AWK (gawk).
#
#    o  urlencode will always terminate it's output with an EOL
#       character
#
# Thanks to Stefan Brozinski for pointing out a bug related to non-standard
# locales.
#
# See also
#   urldecode
##########################################################################

PN=`basename "$0"`          # Program name
VER='1.4'

: ${AWK=awk}

Usage () {
    echo >&2 "$PN - encode URL data, $VER
usage: $PN [-l] [file ...]
    -l:  encode line endings (result will be one line of output)

The default is to encode each input line on its own."
    exit 1
}

Msg () {
    for MsgLine
    do echo "$PN: $MsgLine" >&2
    done
}

Fatal () { Msg "$@"; exit 1; }

set -- `getopt hl "$@" 2>/dev/null` || Usage
[ $# -lt 1 ] && Usage           # "getopt" detected an error

EncodeEOL=no
while [ $# -gt 0 ]
do
    case "$1" in
        -l) EncodeEOL=yes;;
    --) shift; break;;
    -h) Usage;;
    -*) Usage;;
    *)  break;;         # First file name
    esac
    shift
done

LANG=C  export LANG
$AWK '
    BEGIN {
    # We assume an awk implementation that is just plain dumb.
    # We will convert an character to its ASCII value with the
    # table ord[], and produce two-digit hexadecimal output
    # without the printf("%02X") feature.

    EOL = "%0A"     # "end of line" string (encoded)
    split ("1 2 3 4 5 6 7 8 9 A B C D E F", hextab, " ")
    hextab [0] = 0
    for ( i=1; i<=255; ++i ) ord [ sprintf ("%c", i) "" ] = i + 0
    if ("'"$EncodeEOL"'" == "yes") EncodeEOL = 1; else EncodeEOL = 0
    }
    {
    encoded = ""
    for ( i=1; i<=length ($0); ++i ) {
        c = substr ($0, i, 1)
        if ( c ~ /[a-zA-Z0-9.-]/ ) {
        encoded = encoded c     # safe character
        } else if ( c == " " ) {
        encoded = encoded "+"   # special handling
        } else {
        # unsafe character, encode it as a two-digit hex-number
        lo = ord [c] % 16
        hi = int (ord [c] / 16);
        encoded = encoded "%" hextab [hi] hextab [lo]
        }
    }
    if ( EncodeEOL ) {
        printf ("%s", encoded EOL)
    } else {
        print encoded
    }
    }
    END {
        #if ( EncodeEOL ) print ""
    }
' "$@"

2008-11-30 21:42:37

Note

这些函数不是用来编码URL的数据，而是用来编码URL。将url以每行一个的方式放入文件中。

#!/bin/dash

replaceUnicodes () { # $1=input/output file
    if ! mv -f "$1" "$1".tmp 2>/dev/null; then return 1; fi
    output="$1" awk '
    function hexValue(chr) {
        if(chr=="0") return 0; if(chr=="1") return 1; if(chr=="2") return 2; if(chr=="3") return 3; if(chr=="4") return 4; if(chr=="5") return 5;
        if(chr=="6") return 6; if(chr=="7") return 7; if(chr=="8") return 8; if(chr=="9") return 9; if(chr=="A") return 10;
        if(chr=="B") return 11; if(chr=="C") return 12; if(chr=="D") return 13; if(chr=="E") return 14; return 15 }
    function hexToDecimal(str,  value,i,inc) {
        str=toupper(str); value=and(hexValue(substr(str,length(str),1)),15); inc=1;
        for(i=length(str)-1;i>0;i--) {
            value+=lshift(hexValue(substr(str,i,1)),4*inc++)
        } return value }
    function toDecimal(str, value,i) {
        for(i=1;i<=length(str);i++) {
            value=(value*10)+substr(str,i,1)
        } return value }
    function to32BE(high,low) {
        # return 0x10000+((high-0xD800)*0x400)+(low-0xDC00) }
        return lshift((high-0xD800),10)+(low-0xDC00)+0x10000 }
    function toUTF8(value) {
        if(value<0x80) { 
            return sprintf("%%%02X",value)
        } else if(value>0xFFFF) {
            return sprintf("%%%02X%%%02X%%%02X%%%02X",or(0xF0,and(rshift(value,18),0x07)),or(0x80,and(rshift(value,12),0x3F)),or(0x80,and(rshift(value,6),0x3F)),or(0x80,and(rshift(value,0),0x3F)))
        } else if(value>0x07FF) {
            return sprintf("%%%02X%%%02X%%%02X",or(0xE0,and(rshift(value,12),0x0F)),or(0x80,and(rshift(value,6),0x3F)),or(0x80,and(rshift(value,0),0x3F)))
        } else { return sprintf("%%%02X%%%02X",or(0xC0,and(rshift(value,6),0x1F)),or(0x80,and(rshift(value,0),0x3F))) }
    }
    function trap(str) { sub(/^\\+/,"\\",str); return str }
    function esc(str) { gsub(/\\/,"\\\\",str); return str }
    BEGIN { output=ENVIRON["output"] }
    {
        finalStr=""; while(match($0,/[\\]+u[0-9a-fA-F]{4}/)) {
            p=substr($0,RSTART,RLENGTH); num=hexToDecimal(substr(p,RLENGTH-3,4));
            bfrStr=substr($0,1,RSTART-1); $0=substr($0,RSTART+RLENGTH,length($0)-(RSTART+RLENGTH-1));
            if(surrogate) {
                surrogate=0;
                if(RSTART!=1 || num<0xD800 || (num>0xDBFF && num<0xDC00) || num>0xDFFF) {
                    finalStr=sprintf("%s%s%s%s",finalStr,trap(highP),bfrStr,toUTF8(num))
                } else if(num>0xD7FF && num<0xDC00) {
                    surrogate=1; high=num; finalStr=sprintf("%s%s",finalStr,trap(highP))
                } else { finalStr=sprintf("%s%s",finalStr,toUTF8(to32BE(high,num))) }
            } else if(num>0xD7FF && num<0xDC00) {
                surrogate=1; highP=p; high=num; finalStr=sprintf("%s%s",finalStr,bfrStr)
            } else { finalStr=sprintf("%s%s%s",finalStr,bfrStr,toUTF8(num)) }
        } finalStr=sprintf("%s%s",finalStr,$0); $0=finalStr

        while(match($0,/[\\]+U[0-9a-fA-F]{8}/)) {
            str=substr($0,RSTART,RLENGTH); gsub(esc(str),toUTF8(hexToDecimal(substr(str,RLENGTH-7,8))),$0)
        }
        while(match($0,/[\\]*&#[xX][0-9a-fA-F]{1,8};/)) {
            str=substr($0,RSTART,RLENGTH); idx=index(str,"#");
            gsub(esc(str),toUTF8(hexToDecimal(substr(str,idx+2,RLENGTH-idx-2))),$0)
        }
        while(match($0,/[\\]*&#[0-9]{1,10};/)) {
            str=substr($0,RSTART,RLENGTH); idx=index(str,"#");
            gsub(esc(str),toUTF8(toDecimal(substr(str,idx+1,RLENGTH-idx-1))),$0)
        }
        printf("%s\n",$0) > output
    }' "$1".tmp
    rm -f "$1".tmp
}

replaceHtmlEntities () { # $1=input/output file
    if ! mv -f "$1" "$1".tmp 2>/dev/null; then return 1; fi
    sed 's/%3[aA]/:/g; s/%2[fF]/\//g; s/&quot;/%22/g; s/&lt;/%3C/g; s/&gt;/%3E/g; s/&nbsp;/%A0/g; s/&cent;/%A2/g; s/&pound;/%A3/g; s/&yen;/%A5/g; s/&copy;/%A9/g; s/&reg;/%AE/g; s/&amp;/\&/g; s/\\*\//\//g' "$1".tmp > "$1"
    rm -f "$1".tmp
}


# "od -v -A n -t u1 -w99999999"
# "hexdump -v -e \47/1 \42%d \42\47"
# Reminder :: Do not encode (, ), [, and ].
toUTF8Encoded () { # $1=input/output file
    if ! mv -f "$1" "$1".tmp 2>/dev/null; then return 1; fi
    if [ -s "$1".tmp ]; then
        # od -A n -t u1 -w99999999 "$1".tmp | \
        hexdump -v -e '/1 "%d "' "$1".tmp | \
        output="$1" awk 'function hexDigit(chr) { if((chr>47 && chr<58) || (chr>64 && chr<71) || (chr>96 && chr<103)) return 1; return 0 }
        BEGIN { output=ENVIRON["output"] }
        {   for(i=1;i<=NF;i++) {
                flushed=0; c=$(i);
                if(c==13) { if($(i+1)==10) i++; printf("%s\n",url) > output; url=""; flushed=1
                } else if(c==10) { printf("%s\n",url) > output; url=""; flushed=1
                } else if(c==37) {
                    if(hexDigit($(i+1)) && hexDigit($(i+2))) {
                        url=sprintf("%s%%%c%c",url,$(i+1),$(i+2)); i+=2
                    } else { url=sprintf("%s%%25",url) }
                } else if(c>32 && c<127 && c!=34 && c!=39 && c!=96 && c!=60 && c!=62) {
                    url=sprintf("%s%c",url,c)
                } else { url=sprintf("%s%%%02X",url,c) }
            } if(!flushed) printf("%s\n",url) > output
        }'
    fi
    rm -f "$1".tmp
}

调用replacecodes () -> replacemlentities () --> toUTF8Encoded()

2021-05-08 15:02:38

其中一种变体可能很丑，但很简单:

urlencode() {
    local data
    if [[ $# != 1 ]]; then
        echo "Usage: $0 string-to-urlencode"
        return 1
    fi
    data="$(curl -s -o /dev/null -w %{url_effective} --get --data-urlencode "$1" "")"
    if [[ $? != 3 ]]; then
        echo "Unexpected error" 1>&2
        return 2
    fi
    echo "${data##/?}"
    return 0
}

下面是一个单行版本的例子(由Bruno建议):

date | curl -Gso /dev/null -w %{url_effective} --data-urlencode @- "" | cut -c 3-

# If you experience the trailing %0A, use
date | curl -Gso /dev/null -w %{url_effective} --data-urlencode @- "" | sed -E 's/..(.*).../\1/'

2012-05-29 11:11:23

如何为curl命令urlencode数据?

推荐文章

最新文章

标签