{-# LANGUAGE TypeApplications #-}

module PlutusCore.Generators.QuickCheck.Split where

import Control.Monad
import Data.Bifunctor
import Data.Coerce (coerce)
import Data.List (sortBy)
import Data.Ord (comparing)
import Test.QuickCheck

-- | Up to what length a list is considered \"short\".
smallLength :: Int
smallLength :: Int
smallLength = Int
6

-- | Generate a sublist of the given size of the given list. Preserves the order of elements.
sublistN :: Int -> [a] -> Gen [a]
sublistN :: forall a. Int -> [a] -> Gen [a]
sublistN Int
lenRes =
  ([(Int, a)] -> [a]) -> Gen [(Int, a)] -> Gen [a]
forall a b. (a -> b) -> Gen a -> Gen b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (((Int, a) -> a) -> [(Int, a)] -> [a]
forall a b. (a -> b) -> [a] -> [b]
map (Int, a) -> a
forall a b. (a, b) -> b
snd ([(Int, a)] -> [a])
-> ([(Int, a)] -> [(Int, a)]) -> [(Int, a)] -> [a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Int, a) -> (Int, a) -> Ordering) -> [(Int, a)] -> [(Int, a)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (((Int, a) -> Int) -> (Int, a) -> (Int, a) -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Int, a) -> Int
forall a b. (a, b) -> a
fst) ([(Int, a)] -> [(Int, a)])
-> ([(Int, a)] -> [(Int, a)]) -> [(Int, a)] -> [(Int, a)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [(Int, a)] -> [(Int, a)]
forall a. Int -> [a] -> [a]
take Int
lenRes)
    (Gen [(Int, a)] -> Gen [a])
-> ([a] -> Gen [(Int, a)]) -> [a] -> Gen [a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Int, a)] -> Gen [(Int, a)]
forall a. [a] -> Gen [a]
shuffle
    ([(Int, a)] -> Gen [(Int, a)])
-> ([a] -> [(Int, a)]) -> [a] -> Gen [(Int, a)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Int] -> [a] -> [(Int, a)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
0 :: Int ..]

-- | Calculate the maximum number of chunks to split a list of the given list into.
toMaxChunkNumber :: Int -> Int
toMaxChunkNumber :: Int -> Int
toMaxChunkNumber Int
len
  -- For short lists we take the maximum number of chunks to be the length of the list,
  -- i.e. the maximum number of chunks grows at a maximum speed for short lists.
  | Int
len Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
smallLength = Int
len
  -- For longer lists the maximum number of chunks grows slower. We don't really want to split a
  -- 50-element list into each of 1..50 number of chunks.
  | Int
len Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
smallLength Int -> Int -> Int
forall a b. (Num a, Integral b) => a -> b -> a
^ (Int
2 :: Int) = Int
smallLength Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
smallLength
  -- For long lists it grows even slower.
  | Bool
otherwise = Int
smallLength Int -> Int -> Int
forall a. Num a => a -> a -> a
+ forall a b. (RealFrac a, Integral b) => a -> b
round @Double (Double -> Double
forall a. Floating a => a -> a
sqrt (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len)

{-| Calculate the number of ways to divide a list of length @len@ into @chunkNum@ chunks.
Equals to @C(len - 1, chunksNum - 1)@. -}
toChunkNumber :: Int -> Int -> Int
toChunkNumber :: Int -> Int -> Int
toChunkNumber Int
len Int
chunkNum =
  [Int] -> Int
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
product [Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1, Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
2 .. Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
chunkNum Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1]
    Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` [Int] -> Int
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
product [Int
chunkNum Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1, Int
chunkNum Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
2 .. Int
2]

{-| Return a list of pairs, each of which consists of

1. the frequency at which a chunk length needs to be picked by the generation machinery
2. the chunk length itself

>>> toChunkFrequencies (-1)
[]
>>> toChunkFrequencies 0
[]
>>> toChunkFrequencies 1
[(1,1)]
>>> toChunkFrequencies 5
[(1,1),(4,2),(6,3),(4,4),(1,5)]
>>> toChunkFrequencies 10
[(3,1),(6,2),(9,3),(12,4),(15,5),(18,6),(21,7)]
>>> toChunkFrequencies 50
[(3,1),(4,2),(5,3),(6,4),(7,5),(8,6),(9,7),(10,8),(11,9),(12,10),(13,11),(14,12),(15,13)] -}
toChunkFrequencies :: Int -> [(Int, Int)]
toChunkFrequencies :: Int -> [(Int, Int)]
toChunkFrequencies Int
len
  -- For short lists we calculate exact chunk numbers and use those as frequencies in order to get
  -- uniform distribution of list lengths (which does not lead to uniform distribution of lengths
  -- of subtrees, since subtrees with small total count of elements get generated much more often
  -- than those with a big total count of elements, particularly because the latter contain the
  -- former).
  | Int
len Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
smallLength = (Int -> (Int, Int)) -> [Int] -> [(Int, Int)]
forall a b. (a -> b) -> [a] -> [b]
map (\Int
num -> (Int -> Int -> Int
toChunkNumber Int
len Int
num, Int
num)) [Int]
chunks
  | Bool
otherwise =
      let
        -- The probability of "splitting" a list into a single sublist (i.e. simply 'pure') is
        -- about 3%.
        singleElemProb :: Int
singleElemProb = Int
3
        -- Computing @delta@ in order for each subsequent chunk length to get picked a bit more
        -- likely, so that we generate longer forests more often when we can. For not-too-long
        -- lists the frequencies add up to roughly 100. For long lists the sum of frequencies
        -- can be significantly greater than 100 making the chance of generating a single
        -- sublist less than 3%.
        deltaN :: Int
deltaN = Int
chunkMax Int -> Int -> Int
forall a. Num a => a -> a -> a
* (Int
chunkMax Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
2
        delta :: Int
delta = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
1 (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ (Int
100 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
chunkMax Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
singleElemProb) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
deltaN
       in
        [Int] -> [Int] -> [(Int, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip ((Int -> Int) -> Int -> [Int]
forall a. (a -> a) -> a -> [a]
iterate (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
delta) Int
singleElemProb) [Int]
chunks
  where
    chunkMax :: Int
chunkMax = Int -> Int
toMaxChunkNumber Int
len
    chunks :: [Int]
chunks = [Int
1 .. Int
chunkMax]

{-| Split the given list in chunks. The length of each chunk, apart from the final one, is taken
from the first argument.

>>> toChunks [3, 1] "abcdef"
["abc","d","ef"] -}
toChunks :: [Int] -> [a] -> [[a]]
toChunks :: forall a. [Int] -> [a] -> [[a]]
toChunks [] [a]
xs = [[a]
xs]
toChunks (Int
n : [Int]
ns) [a]
xs = [a]
chunk [a] -> [[a]] -> [[a]]
forall a. a -> [a] -> [a]
: [Int] -> [a] -> [[a]]
forall a. [Int] -> [a] -> [[a]]
toChunks [Int]
ns [a]
xs'
  where
    ([a]
chunk, [a]
xs') = Int -> [a] -> ([a], [a])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
n [a]
xs

{-| Split a list into the given number of chunks. Concatenating the resulting lists gives back the
original one. Doesn't generate empty chunks. -}
multiSplit1In :: Int -> [a] -> Gen [NonEmptyList a]
multiSplit1In :: forall a. Int -> [a] -> Gen [NonEmptyList a]
multiSplit1In Int
_ [] = [NonEmptyList a] -> Gen [NonEmptyList a]
forall a. a -> Gen a
forall (f :: * -> *) a. Applicative f => a -> f a
pure []
multiSplit1In Int
chunkNum [a]
xs = do
  let len :: Int
len = [a] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
xs
  -- Pick a list of breakpoints.
  [Int]
breakpoints <- Int -> [Int] -> Gen [Int]
forall a. Int -> [a] -> Gen [a]
sublistN (Int
chunkNum Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) [Int
1 .. Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1]
  -- Turn the list of breakpoints into a list of chunk lengths.
  let chunkLens :: [Int]
chunkLens = (Int -> Int -> Int) -> [Int] -> [Int] -> [Int]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (-) [Int]
breakpoints (Int
0 Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: [Int]
breakpoints)
  -- Chop the argument into chunks according to the list of chunk lengths.
  [NonEmptyList a] -> Gen [NonEmptyList a]
forall a. a -> Gen a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ([NonEmptyList a] -> Gen [NonEmptyList a])
-> ([[a]] -> [NonEmptyList a]) -> [[a]] -> Gen [NonEmptyList a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[a]] -> [NonEmptyList a]
forall a b. Coercible a b => a -> b
coerce ([[a]] -> Gen [NonEmptyList a]) -> [[a]] -> Gen [NonEmptyList a]
forall a b. (a -> b) -> a -> b
$ [Int] -> [a] -> [[a]]
forall a. [Int] -> [a] -> [[a]]
toChunks [Int]
chunkLens [a]
xs

{-| Split a list into chunks at random. Concatenating the resulting lists gives back the original
one. Doesn't generate empty chunks. -}
multiSplit1 :: [a] -> Gen [NonEmptyList a]
multiSplit1 :: forall a. [a] -> Gen [NonEmptyList a]
multiSplit1 [a]
xs = do
  -- Pick a number of chunks.
  Int
chunkNum <- [(Int, Gen Int)] -> Gen Int
forall a. HasCallStack => [(Int, Gen a)] -> Gen a
frequency ([(Int, Gen Int)] -> Gen Int)
-> (Int -> [(Int, Gen Int)]) -> Int -> Gen Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Int, Int) -> (Int, Gen Int)) -> [(Int, Int)] -> [(Int, Gen Int)]
forall a b. (a -> b) -> [a] -> [b]
map ((Int -> Gen Int) -> (Int, Int) -> (Int, Gen Int)
forall a b. (a -> b) -> (Int, a) -> (Int, b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Int -> Gen Int
forall a. a -> Gen a
forall (f :: * -> *) a. Applicative f => a -> f a
pure) ([(Int, Int)] -> [(Int, Gen Int)])
-> (Int -> [(Int, Int)]) -> Int -> [(Int, Gen Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [(Int, Int)]
toChunkFrequencies (Int -> Gen Int) -> Int -> Gen Int
forall a b. (a -> b) -> a -> b
$ [a] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
xs
  Int -> [a] -> Gen [NonEmptyList a]
forall a. Int -> [a] -> Gen [NonEmptyList a]
multiSplit1In Int
chunkNum [a]
xs

{-| Return the left and the right halves of the given list. The first argument controls whether
the middle element of a list having an odd length goes into the left half or the right one.

>>> halve True [1 :: Int]
([1],[])
>>> halve True [1, 2 :: Int]
([1],[2])
>>> halve True [1, 2, 3 :: Int]
([1,2],[3])
>>> halve False [1 :: Int]
([],[1])
>>> halve False [1, 2 :: Int]
([1],[2])
>>> halve False [1, 2, 3 :: Int]
([1],[2,3]) -}
halve :: Bool -> [a] -> ([a], [a])
halve :: forall a. Bool -> [a] -> ([a], [a])
halve Bool
isOddToLeft [a]
xs0 = [a] -> [a] -> ([a], [a])
forall {a} {a}. [a] -> [a] -> ([a], [a])
go [a]
xs0 [a]
xs0
  where
    go :: [a] -> [a] -> ([a], [a])
go (a
_ : a
_ : [a]
xsFast) (a
x : [a]
xsSlow) = ([a] -> [a]) -> ([a], [a]) -> ([a], [a])
forall a b c. (a -> b) -> (a, c) -> (b, c)
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first (a
x a -> [a] -> [a]
forall a. a -> [a] -> [a]
:) (([a], [a]) -> ([a], [a])) -> ([a], [a]) -> ([a], [a])
forall a b. (a -> b) -> a -> b
$ [a] -> [a] -> ([a], [a])
go [a]
xsFast [a]
xsSlow
    go [a
_] (a
x : [a]
xsSlow) | Bool
isOddToLeft = ([a
x], [a]
xsSlow)
    go [a]
_ [a]
xsSlow = ([], [a]
xsSlow)

{-| Insert a value into a list an arbitrary number of times. The first argument controls whether
to allow inserting at the beginning of the list, the second argument is the probability of
inserting an element at the end of the list. -}
insertManyPreferRight :: forall a. Bool -> Double -> a -> [a] -> Gen [a]
insertManyPreferRight :: forall a. Bool -> Double -> a -> [a] -> Gen [a]
insertManyPreferRight Bool
keepPrefix Double
lastProb a
y [a]
xs0 = Bool -> Double -> [a] -> Gen [a]
go Bool
keepPrefix Double
initWeight [a]
xs0
  where
    -- The weight of the "insert @y@ operation" operation at the beginning of the list.
    initWeight :: Double
initWeight = Double
10
    -- How more likely we're to insert an element when moving one element forward in the list.
    -- Should we make it dependent on the length of the list? Maybe it's fine.
    scaling :: Double
scaling = Double
1.1
    -- The weight of the "insert @y@ operation" operation at the end of the list.
    topWeight :: Double
topWeight = Double
scaling Double -> Double -> Double
forall a. Floating a => a -> a -> a
** Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral ([a] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
xs0) Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
initWeight
    -- The weight of the "do nothing" operation.
    noopWeight :: Int
noopWeight = Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
floor (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ Double
topWeight Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
1 Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
lastProb Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
1)

    go :: Bool -> Double -> [a] -> Gen [a]
    go :: Bool -> Double -> [a] -> Gen [a]
go Bool
keep Double
weight [a]
xs = do
      Bool
doCons <- [(Int, Gen Bool)] -> Gen Bool
forall a. HasCallStack => [(Int, Gen a)] -> Gen a
frequency [(Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
floor Double
weight, Bool -> Gen Bool
forall a. a -> Gen a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True), (Int
noopWeight, Bool -> Gen Bool
forall a. a -> Gen a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False)]
      if Bool
doCons
        -- If we don't want to insert elements into the head of the list, then we simply ignore
        -- the generated one and carry on. Ugly, but works.
        then ([a
y | Bool
keep] [a] -> [a] -> [a]
forall a. [a] -> [a] -> [a]
++) ([a] -> [a]) -> Gen [a] -> Gen [a]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Bool -> Double -> [a] -> Gen [a]
go Bool
keep Double
weight [a]
xs
        else case [a]
xs of
          [] -> [a] -> Gen [a]
forall a. a -> Gen a
forall (f :: * -> *) a. Applicative f => a -> f a
pure []
          a
x : [a]
xs' -> (a
x a -> [a] -> [a]
forall a. a -> [a] -> [a]
:) ([a] -> [a]) -> Gen [a] -> Gen [a]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Bool -> Double -> [a] -> Gen [a]
go Bool
True (Double
weight Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
scaling) [a]
xs'

{-| Insert a value into a list an arbitrary number of times. The first argument controls whether
to allow inserting at the end of the list, the second argument is the probability of
inserting an element at the beginning of the list. -}
insertManyPreferLeft :: Bool -> Double -> a -> [a] -> Gen [a]
insertManyPreferLeft :: forall a. Bool -> Double -> a -> [a] -> Gen [a]
insertManyPreferLeft Bool
keepSuffix Double
headProb a
y =
  ([a] -> [a]) -> Gen [a] -> Gen [a]
forall a b. (a -> b) -> Gen a -> Gen b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [a] -> [a]
forall a. [a] -> [a]
reverse (Gen [a] -> Gen [a]) -> ([a] -> Gen [a]) -> [a] -> Gen [a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Bool -> Double -> a -> [a] -> Gen [a]
forall a. Bool -> Double -> a -> [a] -> Gen [a]
insertManyPreferRight Bool
keepSuffix Double
headProb a
y ([a] -> Gen [a]) -> ([a] -> [a]) -> [a] -> Gen [a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> [a]
forall a. [a] -> [a]
reverse

{-| Insert a value into a list an arbitrary number of times. The first argument is the probability
of inserting an element at an end of the list (i.e. either the beginning or the end, not
combined). See 'multiSplit1' for what this function allows us to do. -}
insertManyPreferEnds :: Double -> a -> [a] -> Gen [a]
-- Cut the list in half, insert into the left half skewing generation towards the beginning, insert
-- into the right half skewing generation towards the end, then append the results of those two
-- operations, so that we get a list where additional elements are more likely to occur close to
-- the sides.
insertManyPreferEnds :: forall a. Double -> a -> [a] -> Gen [a]
insertManyPreferEnds Double
endProb a
y [a]
xs = do
  -- In order not to get skewed results we sometimes put the middle element of the list into its
  -- first half and sometimes into its second half.
  Bool
isOddToLeft <- Gen Bool
forall a. Arbitrary a => Gen a
arbitrary
  let ([a]
xsL, [a]
xsR) = Bool -> [a] -> ([a], [a])
forall a. Bool -> [a] -> ([a], [a])
halve Bool
isOddToLeft [a]
xs
  -- If the list has even length, then it was cut into two halves of equal length meaning one slot
  -- for to put an element in appears twice: at the end of the left half and at the beginning of
  -- the right one. Hence in order to avoid skeweness we don't put anything into this slot at the
  -- end of the left half.
  -- Maybe we do want to skew generation to favor the middle of the list like we do for its ends,
  -- but then we need to do that intentionally and systematically, not randomly and a little bit.
  [a]
xsL' <- Bool -> Double -> a -> [a] -> Gen [a]
forall a. Bool -> Double -> a -> [a] -> Gen [a]
insertManyPreferLeft ([a] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
xsL Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= [a] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
xsR) Double
endProb a
y [a]
xsL
  [a]
xsR' <- Bool -> Double -> a -> [a] -> Gen [a]
forall a. Bool -> Double -> a -> [a] -> Gen [a]
insertManyPreferRight Bool
True Double
endProb a
y [a]
xsR
  [a] -> Gen [a]
forall a. a -> Gen a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ([a] -> Gen [a]) -> [a] -> Gen [a]
forall a b. (a -> b) -> a -> b
$ [a]
xsL' [a] -> [a] -> [a]
forall a. [a] -> [a] -> [a]
++ [a]
xsR'

{-| Split a list into chunks at random. Concatenating the resulting lists gives back the original
one. Generates empty chunks. The first argument is the probability of generating at least one
empty chunk as the first element of the resulting list. It is also the probability of generating
an empty chunk as the last element of the resulting list. The probability of generating empty
chunks decreases as we go from either of the ends of the resulting list (this is so that we are
more likely to hit a corner case related to handling elements at the beginning or the end of a
list). -}
multiSplit0 :: Double -> [a] -> Gen [[a]]
multiSplit0 :: forall a. Double -> [a] -> Gen [[a]]
multiSplit0 Double
endProb = [a] -> Gen [NonEmptyList a]
forall a. [a] -> Gen [NonEmptyList a]
multiSplit1 ([a] -> Gen [NonEmptyList a])
-> ([NonEmptyList a] -> Gen [[a]]) -> [a] -> Gen [[a]]
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> Double -> [a] -> [[a]] -> Gen [[a]]
forall a. Double -> a -> [a] -> Gen [a]
insertManyPreferEnds Double
endProb [] ([[a]] -> Gen [[a]])
-> ([NonEmptyList a] -> [[a]]) -> [NonEmptyList a] -> Gen [[a]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [NonEmptyList a] -> [[a]]
forall a b. Coercible a b => a -> b
coerce