在Bash中,测试数组是否包含某个值的最简单方法是什么?


当前回答

保持简单:

Array1=( "item1" "item2" "item3" "item-4" )
var="item3"

count=$(echo ${Array1[@]} | tr ' ' '\n' | awk '$1 == "'"$var"'"{print $0}' | wc -l)
[ $count -eq 0 ] && echo "Not found" || echo "found"

其他回答

以下是几个可能实现的编译,包括集成验证和简单的基准测试(需要Bash >= 4.0):

#!/usr/bin/env bash

# Check if array contains item [$1: item, $2: array name]
function in_array_1() {
    local needle="$1" item
    local -n arrref="$2"
    for item in "${arrref[@]}"; do
        [[ "${item}" == "${needle}" ]] && return 0
    done
    return 1
}

# Check if array contains item [$1: item, $2: array name]
function in_array_2() {
    local needle="$1" arrref="$2[@]" item
    for item in "${!arrref}"; do
        [[ "${item}" == "${needle}" ]] && return 0
    done
    return 1
}

# Check if array contains item [$1: item, $2: array name]
function in_array_3() {
    local needle="$1" i
    local -n arrref="$2"
    for ((i=0; i < ${#arrref[@]}; i++)); do
        [[ "${arrref[i]}" == "${needle}" ]] && return 0
    done
    return 1
}

# Check if array contains item [$1: item, $2..$n: array items]
function in_array_4() {
    local needle="$1" item
    shift
    for item; do
        [[ "${item}" == "${needle}" ]] && return 0
    done
    return 1
}

# Check if array contains item [$1: item, $2..$n: array items]
function in_array_5() {
    local needle="$1" item
    for item in "${@:2}"; do
        [[ "${item}" == "${needle}" ]] && return 0
    done
    return 1
}

# Check if array contains item [$1: item, $2: array name]
function in_array_6() {
    local needle="$1" arrref="$2[@]" array i
    array=("${!arrref}")
    for ((i=0; i < ${#array[@]}; i++)); do
        [[ "${array[i]}" == "${needle}" ]] && return 0
    done
    return 1
}

# Check if array contains item [$1: item, $2..$n: array items]
function in_array_7() {
    local needle="$1" array=("${@:2}") item
    for item in "${array[@]}"; do
        [[ "${item}" == "${needle}" ]] && return 0
    done
    return 1
}

# Check if array contains item [$1: item, $2..$n: array items]
function in_array_8() {
    local needle="$1"
    shift
    while (( $# > 0 )); do
        [[ "$1" == "${needle}" ]] && return 0
        shift
    done
    return 1
}


#------------------------------------------------------------------------------


# Generate map for array [$1: name of source array, $2: name of target array]
# NOTE: target array must be pre-declared by caller using 'declare -A <name>'
function generate_array_map() {
    local -n srcarr="$1" dstmap="$2"
    local i key
    dstmap=()
    for i in "${!srcarr[@]}"; do
        key="${srcarr[i]}"
        [[ -z ${dstmap["${key}"]+set} ]] && dstmap["${key}"]=${i} || dstmap["${key}"]+=,${i}
    done
}

# Check if array contains item [$1: item, $2: name of array map]
function in_array_9() {
    local needle="$1"
    local -n mapref="$2"
    [[ -n "${mapref["${needle}"]+set}" ]] && return 0 || return 1
}


#------------------------------------------------------------------------------


# Test in_array function [$1: function name, $2: function description, $3: test array size]
function test() {
    local tname="$1" tdesc="$2" tn=$3 ti=0 tj=0 ta=() tct=0 tepapre="" tepapost="" tepadiff=()
    local -A tam=()

    echo -e "\e[1m${tname} (${tdesc}):\e[0m"

    # Generate list of currently defined variables
    tepapre="$(compgen -v)"

    # Fill array with random items
    for ((ti=0; ti < ${tn}; ti++)); do
        ta+=("${RANDOM} ${RANDOM} ${RANDOM} ${RANDOM}")
    done

    # Determine function call type (pass array items, pass array name, pass array map)
    case "${tname}" in
        "in_array_1"|"in_array_2"|"in_array_3"|"in_array_6") tct=0; ;;
        "in_array_4"|"in_array_5"|"in_array_7"|"in_array_8") tct=1; ;;
        "in_array_9") generate_array_map ta tam; tct=2; ;;
        *) echo "Unknown in_array function '${tname}', aborting"; return 1; ;;
    esac

    # Verify in_array function is working as expected by picking a few random
    # items and checking
    echo -e "\e[1mVerification...\e[0m"
    for ((ti=0; ti < 10; ti++)); do
        tj=$(( ${RANDOM} % ${#ta[@]} ))
        echo -n "Item ${tj} '${ta[tj]}': "
        if (( ${tct} == 0 )); then
            "${tname}" "${ta[tj]}" ta && echo -en "\e[1;32mok\e[0m" || echo -en "\e[1;31mnok\e[0m"
            echo -n " "
            "${tname}" "${ta[tj]}.x" ta && echo -en "\e[1;31mnok\e[0m" || echo -en "\e[1;32mok\e[0m"
        elif (( ${tct} == 1 )); then
            "${tname}" "${ta[tj]}" "${ta[@]}" && echo -en "\e[1;32mok\e[0m" || echo -en "\e[1;31mnok\e[0m"
            echo -n " "
            "${tname}" "${ta[tj]}.x" "${ta[@]}" && echo -en "\e[1;31mnok\e[0m" || echo -en "\e[1;32mok\e[0m"
        elif (( ${tct} == 2 )); then
            "${tname}" "${ta[tj]}" tam && echo -en "\e[1;32mok\e[0m" || echo -en "\e[1;31mnok\e[0m"
            echo -n " "
            "${tname}" "${ta[tj]}.x" tam && echo -en "\e[1;31mnok\e[0m" || echo -en "\e[1;32mok\e[0m"
        fi
        echo
    done

    # Benchmark in_array function
    echo -en "\e[1mBenchmark...\e[0m"
    time for ((ti=0; ti < ${#ta[@]}; ti++)); do
        if (( ${tct} == 0 )); then
            "${tname}" "${ta[ti]}" ta
        elif (( ${tct} == 1 )); then
            "${tname}" "${ta[ti]}" "${ta[@]}"
        elif (( ${tct} == 2 )); then
            "${tname}" "${ta[ti]}" tam
        fi
    done

    # Generate list of currently defined variables, compare to previously
    # generated list to determine possible environment pollution
    echo -e "\e[1mEPA test...\e[0m"
    tepapost="$(compgen -v)"
    readarray -t tepadiff < <(echo -e "${tepapre}\n${tepapost}" | sort | uniq -u)
    if (( ${#tepadiff[@]} == 0 )); then
        echo -e "\e[1;32mclean\e[0m"
    else
        echo -e "\e[1;31mpolluted:\e[0m ${tepadiff[@]}"
    fi

    echo
}


#------------------------------------------------------------------------------


# Test in_array functions
n=5000
echo
( test in_array_1 "pass array name, nameref reference, for-each-loop over array items" ${n} )
( test in_array_2 "pass array name, indirect reference, for-each-loop over array items" ${n} )
( test in_array_3 "pass array name, nameref reference, c-style for-loop over array items by index" ${n} )
( test in_array_4 "pass array items, for-each-loop over arguments" ${n} )
( test in_array_5 "pass array items, for-each-loop over arguments as array" ${n} )
( test in_array_6 "pass array name, indirect reference + array copy, c-style for-loop over array items by index" ${n} )
( test in_array_7 "pass array items, copy array from arguments as array, for-each-loop over array items" ${n} )
( test in_array_8 "pass array items, while-loop, shift over arguments" ${n} )
( test in_array_9 "pre-generated array map, pass array map name, direct test without loop" ${n} )

结果:

in_array_1 (pass array name, nameref reference, for-each-loop over array items):
Verification...
Item 862 '19528 10140 12669 17820': ok ok
Item 2250 '27262 30442 9295 24867': ok ok
Item 4794 '3857 17404 31925 27993': ok ok
Item 2532 '14553 12282 26511 32657': ok ok
Item 1911 '21715 8066 15277 27126': ok ok
Item 4289 '3081 10265 16686 19121': ok ok
Item 4837 '32220 1758 304 7871': ok ok
Item 901 '20652 23880 20634 14286': ok ok
Item 2488 '14578 8625 30251 9343': ok ok
Item 4165 '4514 25064 29301 7400': ok ok
Benchmark...
real    1m11,796s
user    1m11,262s
sys     0m0,473s
EPA test...
clean

in_array_2 (pass array name, indirect reference, for-each-loop over array items):
Verification...
Item 2933 '17482 25789 27710 2096': ok ok
Item 3584 '876 14586 20885 8567': ok ok
Item 872 '176 19749 27265 18038': ok ok
Item 595 '6597 31710 13266 8813': ok ok
Item 748 '569 9200 28914 11297': ok ok
Item 3791 '26477 13218 30172 31532': ok ok
Item 2900 '3059 8457 4879 16634': ok ok
Item 676 '23511 686 589 7265': ok ok
Item 2248 '31351 7961 17946 24782': ok ok
Item 511 '8484 23162 11050 426': ok ok
Benchmark...
real    1m11,524s
user    1m11,086s
sys     0m0,437s
EPA test...
clean

in_array_3 (pass array name, nameref reference, c-style for-loop over array items by index):
Verification...
Item 1589 '747 10250 20133 29230': ok ok
Item 488 '12827 18892 31996 1977': ok ok
Item 801 '19439 25243 24485 24435': ok ok
Item 2588 '17193 18893 21610 9302': ok ok
Item 4436 '7100 655 8847 3068': ok ok
Item 2620 '19444 6457 28835 24717': ok ok
Item 4398 '4420 16336 612 4255': ok ok
Item 2430 '32397 2402 12631 29774': ok ok
Item 3419 '906 5361 32752 7698': ok ok
Item 356 '9776 16485 20838 13330': ok ok
Benchmark...
real    1m17,037s
user    1m17,019s
sys     0m0,005s
EPA test...
clean

in_array_4 (pass array items, for-each-loop over arguments):
Verification...
Item 1388 '7932 15114 4025 15625': ok ok
Item 3900 '23863 25328 5632 2752': ok ok
Item 2678 '31296 4216 17485 8874': ok ok
Item 1893 '16952 29047 29104 23384': ok ok
Item 1616 '19543 5999 4485 22929': ok ok
Item 93 '14456 2806 12829 19552': ok ok
Item 265 '30961 19733 11863 3101': ok ok
Item 4615 '10431 9566 25767 13518': ok ok
Item 576 '11726 15104 11116 74': ok ok
Item 3829 '19371 25026 6252 29478': ok ok
Benchmark...
real    1m30,912s
user    1m30,740s
sys     0m0,011s
EPA test...
clean

in_array_5 (pass array items, for-each-loop over arguments as array):
Verification...
Item 1012 '29213 31971 21483 30225': ok ok
Item 2802 '4079 5423 29240 29619': ok ok
Item 473 '6968 798 23936 6852': ok ok
Item 2183 '20734 4521 30800 2126': ok ok
Item 3059 '14952 9918 15695 19309': ok ok
Item 1424 '25784 28380 14555 21893': ok ok
Item 1087 '16345 19823 26210 20083': ok ok
Item 257 '28890 5198 7251 3866': ok ok
Item 3986 '29035 19288 12107 3857': ok ok
Item 2509 '9219 32484 12842 27472': ok ok
Benchmark...
real    1m53,485s
user    1m53,404s
sys     0m0,077s
EPA test...
clean

in_array_6 (pass array name, indirect reference + array copy, c-style for-loop over array items by index):
Verification...
Item 4691 '25498 10521 20673 14948': ok ok
Item 263 '25265 29824 3876 14088': ok ok
Item 2550 '2416 14274 12594 29740': ok ok
Item 2269 '2769 11436 3622 28273': ok ok
Item 3246 '23730 25956 3514 17626': ok ok
Item 1059 '10776 12514 27222 15640': ok ok
Item 53 '23813 13365 16022 4092': ok ok
Item 1503 '6593 23540 10256 17818': ok ok
Item 2452 '12600 27404 30960 26759': ok ok
Item 2526 '21190 32512 23651 7865': ok ok
Benchmark...
real    1m54,793s
user    1m54,326s
sys     0m0,457s
EPA test...
clean

in_array_7 (pass array items, copy array from arguments as array, for-each-loop over array items):
Verification...
Item 2212 '12127 12828 27570 7051': ok ok
Item 1393 '19552 26263 1067 23332': ok ok
Item 506 '18818 8253 14924 30710': ok ok
Item 789 '9803 1886 17584 32686': ok ok
Item 1795 '19788 27842 28044 3436': ok ok
Item 376 '4372 16953 17280 4031': ok ok
Item 4846 '19130 6261 21959 6869': ok ok
Item 2064 '2357 32221 22682 5814': ok ok
Item 4866 '10928 10632 19175 14984': ok ok
Item 1294 '8499 11885 5900 6765': ok ok
Benchmark...
real    2m35,012s
user    2m33,578s
sys     0m1,433s
EPA test...
clean

in_array_8 (pass array items, while-loop, shift over arguments):
Verification...
Item 134 '1418 24798 20169 9501': ok ok
Item 3986 '12160 12021 29794 29236': ok ok
Item 1607 '26633 14260 18227 898': ok ok
Item 2688 '18387 6285 2385 18432': ok ok
Item 603 '1421 306 6102 28735': ok ok
Item 625 '4530 19718 30900 1938': ok ok
Item 4033 '9968 24093 25080 8179': ok ok
Item 310 '6867 9884 31231 29173': ok ok
Item 661 '3794 4745 26066 22691': ok ok
Item 4129 '3039 31766 6714 4921': ok ok
Benchmark...
real    5m51,097s
user    5m50,566s
sys     0m0,495s
EPA test...
clean

in_array_9 (pre-generated array map, pass array map name, direct test without loop):
Verification...
Item 3696 '661 6048 13881 26901': ok ok
Item 815 '29729 13733 3935 20697': ok ok
Item 1076 '9220 3405 18448 7240': ok ok
Item 595 '8912 2886 13678 24066': ok ok
Item 2803 '13534 23891 5344 652': ok ok
Item 1810 '12528 32150 7050 1254': ok ok
Item 4055 '21840 7436 1350 15443': ok ok
Item 2416 '19550 28434 17110 31203': ok ok
Item 1630 '21054 2819 7527 953': ok ok
Item 1044 '30152 22211 22226 6950': ok ok
Benchmark...
real    0m0,128s
user    0m0,128s
sys     0m0,000s
EPA test...
clean

别胡闹了!使您的解决方案简单、干净和可重用。

这些函数负责索引数组和关联数组。可以通过将搜索算法从线性搜索升级为二进制搜索(用于大型数据集)来改进它们。

##
# Determines if a value exists in an array.
###
function hasArrayValue ()
{
    local -r needle="{$1:?}"
    local -nr haystack="{$2:?}"  # Where you pass by reference to get the entire array in one argument.

    # Linear search. Upgrade to binary search for large datasets.
    for value in "${haystack[@]}"; do
        if [[ "$value" == "$needle" ]]; then
            return 0
        fi
    done

    return 1
}

##
# Determines if a value exists in an associative array / map.
###
function hasMapValue ()
{
    local -r needle="{$1:?}"
    local -nr haystack="{$2:?}"

    # Linear search. Upgrade to binary search for large datasets.
    for value in "${haystack[@]}"; do
        if [[ $value == $needle ]]; then
            return 0
        fi
    done

    return 1
}

是的,同样的逻辑,但在处理bash时,如果函数的名称可以让您知道迭代的对象(或不迭代的对象),则可能(可能)有用。

这对我来说很管用:

# traditional system call return values-- used in an `if`, this will be true when returning 0. Very Odd.
contains () {
    # odd syntax here for passing array parameters: http://stackoverflow.com/questions/8082947/how-to-pass-an-array-to-a-bash-function
    local list=$1[@]
    local elem=$2

    # echo "list" ${!list}
    # echo "elem" $elem

    for i in "${!list}"
    do
        # echo "Checking to see if" "$i" "is the same as" "${elem}"
        if [ "$i" == "${elem}" ] ; then
            # echo "$i" "was the same as" "${elem}"
            return 0
        fi
    done

    # echo "Could not find element"
    return 1
}

示例调用:

arr=("abc" "xyz" "123")
if contains arr "abcx"; then
    echo "Yes"
else
    echo "No"
fi
a=(b c d)

if printf '%s\0' "${a[@]}" | grep -Fqxz c
then
  echo 'array “a” contains value “c”'
fi

如果你喜欢,你可以使用相同的长选项:

--fixed-strings --quiet --line-regexp --null-data
: NeedleInArgs "$needle" "${haystack[@]}"
: NeedleInArgs "$needle" arg1 arg2 .. argN
NeedleInArgs()
{
local a b;
printf -va '\n%q\n' "$1";
printf -vb '%q\n' "${@:2}";
case $'\n'"$b" in (*"$a"*) return 0;; esac;
return 1;
}

使用:

NeedleInArgs "$needle" "${haystack[@]}" && echo "$needle" found || echo "$needle" not found;

对于bash v3.1及以上版本(printf -v支持) 没有分叉,也没有外部程序 没有循环(除了bash中的内部扩展) 适用于所有可能的值和数组,没有异常,没有什么可担心的

也可以直接使用,比如:

if      NeedleInArgs "$input" value1 value2 value3 value4;
then
        : input from the list;
else
        : input not from list;
fi;

对于从v20.5 b到v3.0的bash, printf缺少-v,因此需要额外的2个fork(但不需要执行,因为printf是bash内置的):

NeedleInArgs()
{
case $'\n'"`printf '%q\n' "${@:2}"`" in
(*"`printf '\n%q\n' "$1"`"*) return 0;;
esac;
return 1;
}

注意,我测试了时间:

check call0:  n: t4.43 u4.41 s0.00 f: t3.65 u3.64 s0.00 l: t4.91 u4.90 s0.00 N: t5.28 u5.27 s0.00 F: t2.38 u2.38 s0.00 L: t5.20 u5.20 s0.00
check call1:  n: t3.41 u3.40 s0.00 f: t2.86 u2.84 s0.01 l: t3.72 u3.69 s0.02 N: t4.01 u4.00 s0.00 F: t1.15 u1.15 s0.00 L: t4.05 u4.05 s0.00
check call2:  n: t3.52 u3.50 s0.01 f: t3.74 u3.73 s0.00 l: t3.82 u3.80 s0.01 N: t2.67 u2.67 s0.00 F: t2.64 u2.64 s0.00 L: t2.68 u2.68 s0.00

Call0和call1是对另一个快速pure-bash变体调用的不同变体 Call2在这里。 N=notfound F=firstmatch L=lastmatch 小写字母为短数组,大写字母为长数组

正如您所看到的,这里的这个变体有一个非常稳定的运行时,所以它不太依赖于匹配位置。运行时主要由数组长度决定。搜索变量的运行时高度依赖于匹配位置。所以在边缘情况下,这个变体可以(快得多)。

但非常重要的是,搜索变量的RAM效率更高,因为这里的这个变量总是将整个数组转换为一个大字符串。

所以如果你的内存很紧,你希望大部分比赛都是早期的,那么就不要在这里使用这个。但是,如果您想要一个可预测的运行时,有很长的数组来匹配(期望延迟或根本不匹配),并且双RAM使用也不是太大的问题,那么这里有一些优势。

定时测试脚本:

in_array()
{
    local needle="$1" arrref="$2[@]" item
    for item in "${!arrref}"; do
        [[ "${item}" == "${needle}" ]] && return 0
    done
    return 1
}

NeedleInArgs()
{
local a b;
printf -va '\n%q\n' "$1";
printf -vb '%q\n' "${@:2}";
case $'\n'"$b" in (*"$a"*) return 0;; esac;
return 1;
}

loop1() { for a in {1..100000}; do "$@"; done }
loop2() { for a in {1..1000}; do "$@"; done }

run()
{
  needle="$5"
  arr=("${@:6}")

  out="$( ( time -p "loop$2" "$3" ) 2>&1 )"

  ret="$?"
  got="${out}"
  syst="${got##*sys }"
  got="${got%"sys $syst"}"
  got="${got%$'\n'}"
  user="${got##*user }"
  got="${got%"user $user"}"
  got="${got%$'\n'}"
  real="${got##*real }"
  got="${got%"real $real"}"
  got="${got%$'\n'}"
  printf ' %s: t%q u%q s%q' "$1" "$real" "$user" "$syst"
  [ -z "$rest" ] && [ "$ret" = "$4" ] && return
  printf 'FAIL! expected %q got %q\n' "$4" "$ret"
  printf 'call:   %q\n' "$3"
  printf 'out:    %q\n' "$out"
  printf 'rest:   %q\n' "$rest"
  printf 'needle: %q\n' "$5"
  printf 'arr:   '; printf ' %q' "${@:6}"; printf '\n'
  exit 1
}

check()
{
  printf 'check %q: ' "$1"
  run n 1 "$1" 1 needle a b c d
  run f 1 "$1" 0 needle needle a b c d
  run l 1 "$1" 0 needle a b c d needle
  run N 2 "$1" 1 needle "${rnd[@]}"
  run F 2 "$1" 0 needle needle "${rnd[@]}"
  run L 2 "$1" 0 needle "${rnd[@]}" needle
  printf '\n'
}

call0() { chk=("${arr[@]}"); in_array "$needle" chk; }
call1() { in_array "$needle" arr; }
call2() { NeedleInArgs "$needle" "${arr[@]}"; }

rnd=()
for a in {1..1000}; do rnd+=("$a"); done

check call0
check call1
check call2