# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# Copyright (c) 2006-2020, Salvatore Sanfilippo
# See bundled license file licenses/LICENSE.redis for details.

# This file is copied and modified from the Redis project,
# which started out as: https://github.com/redis/redis/blob/dbcc0a8/tests/unit/hyperloglog.tcl

start_server {tags {"hll"}} {
    test {HyperLogLog self test passes} {
        catch {r pfselftest} e
        set e
    } {OK}

    test {PFADD without arguments creates an HLL value} {
        r pfadd hll
        r exists hll
    } {1}

    test {Approximated cardinality after creation is zero} {
        r pfcount hll
    } {0}

    test {PFADD returns 1 when at least 1 reg was modified} {
        r pfadd hll a b c
    } {1}

    test {PFADD returns 0 when no reg was modified} {
        r pfadd hll a b c
    } {0}

    test {PFADD works with empty string (regression)} {
        r pfadd hll ""
    }

    # Note that the self test stresses much better the
    # cardinality estimation error. We are testing just the
    # command implementation itself here.
    test {PFCOUNT returns approximated cardinality of set} {
        r del hll
        set res {}
        r pfadd hll 1 2 3 4 5
        lappend res [r pfcount hll]
        # Call it again to test cached value invalidation.
        r pfadd hll 6 7 8 8 9 10
        lappend res [r pfcount hll]
        set res
    } {5 10}

    test {HyperLogLogs are promote from sparse to dense} {
        r del hll
        r config set hll-sparse-max-bytes 3000
        set n 0
        while {$n < 100000} {
            set elements {}
            for {set j 0} {$j < 100} {incr j} {lappend elements [expr rand()]}
            incr n 100
            r pfadd hll {*}$elements
            set card [r pfcount hll]
            set err [expr {abs($card-$n)}]
            assert {$err < (double($card)/100)*5}
            if {$n < 1000} {
                assert {[r pfdebug encoding hll] eq {sparse}}
            } elseif {$n > 10000} {
                assert {[r pfdebug encoding hll] eq {dense}}
            }
        }
    }

    test {HyperLogLog sparse encoding stress test} {
        for {set x 0} {$x < 1000} {incr x} {
            r del hll1 hll2
            set numele [randomInt 100]
            set elements {}
            for {set j 0} {$j < $numele} {incr j} {
                lappend elements [expr rand()]
            }
            # Force dense representation of hll2
            r pfadd hll2
            r pfdebug todense hll2
            r pfadd hll1 {*}$elements
            r pfadd hll2 {*}$elements
            assert {[r pfdebug encoding hll1] eq {sparse}}
            assert {[r pfdebug encoding hll2] eq {dense}}
            # Cardinality estimated should match exactly.
            assert {[r pfcount hll1] eq [r pfcount hll2]}
        }
    }

    test {Corrupted sparse HyperLogLogs are detected: Additional at tail} {
        r del hll
        r pfadd hll a b c
        r append hll "hello"
        set e {}
        catch {r pfcount hll} e
        set e
    } {*INVALIDOBJ*}

    test {Corrupted sparse HyperLogLogs are detected: Broken magic} {
        r del hll
        r pfadd hll a b c
        r setrange hll 0 "0123"
        set e {}
        catch {r pfcount hll} e
        set e
    } {*WRONGTYPE*}

    test {Corrupted sparse HyperLogLogs are detected: Invalid encoding} {
        r del hll
        r pfadd hll a b c
        r setrange hll 4 "x"
        set e {}
        catch {r pfcount hll} e
        set e
    } {*WRONGTYPE*}

    test {Corrupted dense HyperLogLogs are detected: Wrong length} {
        r del hll
        r pfadd hll a b c
        r setrange hll 4 "\x00"
        set e {}
        catch {r pfcount hll} e
        set e
    } {*WRONGTYPE*}

    test {Fuzzing dense/sparse encoding: Redis should always detect errors} {
        for {set j 0} {$j < 1000} {incr j} {
            r del hll
            set items {}
            set numitems [randomInt 3000]
            for {set i 0} {$i < $numitems} {incr i} {
                lappend items [expr {rand()}]
            }
            r pfadd hll {*}$items

            # Corrupt it in some random way.
            for {set i 0} {$i < 5} {incr i} {
                set len [r strlen hll]
                set pos [randomInt $len]
                set byte [randstring 1 1 binary]
                r setrange hll $pos $byte
                # Don't modify more bytes 50% of times
                if {rand() < 0.5} break
            }

            # Use the hyperloglog to check if it crashes
            # Redis in some way.
            catch {
                r pfcount hll
            }
        }
    }

    test {PFADD, PFCOUNT, PFMERGE type checking works} {
        r set foo bar
        catch {r pfadd foo 1} e
        assert_match {*WRONGTYPE*} $e
        catch {r pfcount foo} e
        assert_match {*WRONGTYPE*} $e
        catch {r pfmerge bar foo} e
        assert_match {*WRONGTYPE*} $e
        catch {r pfmerge foo bar} e
        assert_match {*WRONGTYPE*} $e
    }

    test {PFMERGE results on the cardinality of union of sets} {
        r del hll hll1 hll2 hll3
        r pfadd hll1 a b c
        r pfadd hll2 b c d
        r pfadd hll3 c d e
        r pfmerge hll hll1 hll2 hll3
        r pfcount hll
    } {5}

    test {PFCOUNT multiple-keys merge returns cardinality of union #1} {
        r del hll1 hll2 hll3
        for {set x 1} {$x < 10000} {incr x} {
            r pfadd hll1 "foo-$x"
            r pfadd hll2 "bar-$x"
            r pfadd hll3 "zap-$x"

            set card [r pfcount hll1 hll2 hll3]
            set realcard [expr {$x*3}]
            set err [expr {abs($card-$realcard)}]
            assert {$err < (double($card)/100)*5}
        }
    }

    test {PFCOUNT multiple-keys merge returns cardinality of union #2} {
        r del hll1 hll2 hll3
        set elements {}
        for {set x 1} {$x < 10000} {incr x} {
            for {set j 1} {$j <= 3} {incr j} {
                set rint [randomInt 20000]
                r pfadd hll$j $rint
                lappend elements $rint
            }
        }
        set realcard [llength [lsort -unique $elements]]
        set card [r pfcount hll1 hll2 hll3]
        set err [expr {abs($card-$realcard)}]
        assert {$err < (double($card)/100)*5}
    }

    test {PFDEBUG GETREG returns the HyperLogLog raw registers} {
        r del hll
        r pfadd hll 1 2 3
        llength [r pfdebug getreg hll]
    } {16384}

    test {PFADD / PFCOUNT cache invalidation works} {
        r del hll
        r pfadd hll a b c
        r pfcount hll
        assert {[r getrange hll 15 15] eq "\x00"}
        r pfadd hll a b c
        assert {[r getrange hll 15 15] eq "\x00"}
        r pfadd hll 1 2 3
        assert {[r getrange hll 15 15] eq "\x80"}
    }
}
