-- editorconfig-checker-disable-file

{-# LANGUAGE BangPatterns      #-}
{-# LANGUAGE MagicHash         #-}
{-# LANGUAGE OverloadedStrings #-}

-- | Implementations for CIP-121, CIP-122 and CIP-123. Grouped because they all operate on
-- 'ByteString's, and require similar functionality.
module PlutusCore.Bitwise (
  integerToByteString,
  byteStringToInteger,
  andByteString,
  orByteString,
  xorByteString,
  complementByteString,
  shiftByteString,
  rotateByteString,
  readBit,
  writeBits,
  replicateByte,
  countSetBits,
  findFirstSetBit,
  IntegerToByteStringError (..),
  maximumOutputLength
  ) where

import PlutusCore.Builtin (BuiltinResult, emit)
import PlutusCore.Evaluation.Result (evaluationFailure)

import ByteString.StrictBuilder (Builder)
import ByteString.StrictBuilder qualified as Builder
import Control.Exception (Exception, throwIO, try)
import Control.Monad (guard, unless, when)
import Data.Bits (unsafeShiftL, unsafeShiftR, (.|.))
import Data.Bits qualified as Bits
import Data.ByteString (ByteString)
import Data.ByteString qualified as BS
import Data.ByteString.Internal qualified as BSI
import Data.Foldable (for_)
import Data.Text (pack)
import Data.Word (Word64, Word8)
import Foreign.Marshal.Utils (copyBytes, fillBytes)
import Foreign.Ptr (Ptr, castPtr, plusPtr)
import Foreign.Storable (peekByteOff, peekElemOff, pokeByteOff, pokeElemOff)
import GHC.ByteOrder (ByteOrder (BigEndian, LittleEndian))
import GHC.Exts (Int (I#))
import GHC.Integer.Logarithms (integerLog2#)
import GHC.IO.Unsafe (unsafeDupablePerformIO)

{- Note [Input length limitation for IntegerToByteString].
We make `integerToByteString` and `replicateByte` fail if they're called with arguments which would
cause the length of the result to exceed about 8K bytes because the execution time becomes difficult
to predict accurately beyond this point (benchmarks on a number of different machines show that the
CPU time increases smoothly for inputs up to about 8K then increases sharply, becoming chaotic after
about 14K).  This restriction may be removed once a more efficient implementation becomes available,
which may happen when we no longer have to support GHC 8.10. -}
{- NB: if we do relax the length restriction then we will need two variants of integerToByteString in
   Plutus Core so that we can continue to support the current behaviour for old scripts.-}
maximumOutputLength :: Integer
maximumOutputLength :: Integer
maximumOutputLength = Integer
8192

{- Return the base 2 logarithm of an integer, returning 0 for inputs that aren't
   strictly positive.  This is essentially copied from GHC.Num.Integer, which
   has integerLog2 but only in GHC >= 9.0. We should use the library function
   instead when we stop supporting 8.10. -}
integerLog2 :: Integer -> Int
integerLog2 :: Integer -> Int
integerLog2 !Integer
i = Int# -> Int
I# (Integer -> Int#
integerLog2# Integer
i)

-- | Wrapper for 'unsafeIntegerToByteString' to make it more convenient to define as a builtin.
integerToByteString :: Bool -> Integer -> Integer -> BuiltinResult ByteString
integerToByteString :: Bool -> Integer -> Integer -> BuiltinResult ByteString
integerToByteString Bool
endiannessArg Integer
lengthArg Integer
input
  -- Check that the length is non-negative.
  | Integer
lengthArg Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
< Integer
0 = do
      Text -> BuiltinResult ()
emit Text
"integerToByteString: negative length argument"
      Text -> BuiltinResult ()
emit (Text -> BuiltinResult ()) -> Text -> BuiltinResult ()
forall a b. (a -> b) -> a -> b
$ Text
"Length requested: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> ([Char] -> Text
pack ([Char] -> Text) -> (Integer -> [Char]) -> Integer -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Integer -> [Char]
forall a. Show a => a -> [Char]
show (Integer -> Text) -> Integer -> Text
forall a b. (a -> b) -> a -> b
$ Integer
input)
      BuiltinResult ByteString
forall err. AsEvaluationFailure err => err
evaluationFailure
  -- Check that the requested length does not exceed the limit.  *NB*: if we remove the limit we'll
  -- still have to make sure that the length fits into an Int.
  | Integer
lengthArg Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
> Integer
maximumOutputLength = do
      Text -> BuiltinResult ()
emit (Text -> BuiltinResult ())
-> ([Char] -> Text) -> [Char] -> BuiltinResult ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> Text
pack ([Char] -> BuiltinResult ()) -> [Char] -> BuiltinResult ()
forall a b. (a -> b) -> a -> b
$ [Char]
"integerToByteString: requested length is too long (maximum is "
               [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Integer -> [Char]
forall a. Show a => a -> [Char]
show Integer
maximumOutputLength
               [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" bytes)"
      Text -> BuiltinResult ()
emit (Text -> BuiltinResult ()) -> Text -> BuiltinResult ()
forall a b. (a -> b) -> a -> b
$ Text
"Length requested: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> ([Char] -> Text
pack ([Char] -> Text) -> (Integer -> [Char]) -> Integer -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Integer -> [Char]
forall a. Show a => a -> [Char]
show (Integer -> Text) -> Integer -> Text
forall a b. (a -> b) -> a -> b
$ Integer
lengthArg)
      BuiltinResult ByteString
forall err. AsEvaluationFailure err => err
evaluationFailure
  -- If the requested length is zero (ie, an explicit output size is not
  -- specified) we still have to make sure that the output won't exceed the size
  -- limit.  If the requested length is nonzero and less than the limit,
  -- integerToByteString checks that the input fits.
  | Integer
lengthArg Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0 -- integerLog2 n is one less than the number of significant bits in n
       Bool -> Bool -> Bool
&& Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer -> Int
integerLog2 Integer
input) Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Integer
8 Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* Integer
maximumOutputLength =
    let bytesRequiredFor :: Integer -> Int
bytesRequiredFor Integer
n = Integer -> Int
integerLog2 Integer
n Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
8 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
        -- ^ This gives 1 instead of 0 for n=0, but we'll never get that.
    in do
      Text -> BuiltinResult ()
emit (Text -> BuiltinResult ())
-> ([Char] -> Text) -> [Char] -> BuiltinResult ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> Text
pack ([Char] -> BuiltinResult ()) -> [Char] -> BuiltinResult ()
forall a b. (a -> b) -> a -> b
$ [Char]
"integerToByteString: input too long (maximum is 2^"
               [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Integer -> [Char]
forall a. Show a => a -> [Char]
show (Integer
8 Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* Integer
maximumOutputLength)
               [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"-1)"
      Text -> BuiltinResult ()
emit (Text -> BuiltinResult ()) -> Text -> BuiltinResult ()
forall a b. (a -> b) -> a -> b
$ Text
"Length required: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> ([Char] -> Text
pack ([Char] -> Text) -> (Int -> [Char]) -> Int -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [Char]
forall a. Show a => a -> [Char]
show (Int -> Text) -> Int -> Text
forall a b. (a -> b) -> a -> b
$ Integer -> Int
bytesRequiredFor Integer
input)
      BuiltinResult ByteString
forall err. AsEvaluationFailure err => err
evaluationFailure
  | Bool
otherwise = let endianness :: ByteOrder
endianness = Bool -> ByteOrder
endiannessArgToByteOrder Bool
endiannessArg in
    -- We use fromIntegral here, despite advice to the contrary in general when defining builtin
    -- denotations. This is because, if we've made it this far, we know that overflow or truncation
    -- are impossible: we've checked that whatever we got given fits inside a (non-negative) Int.
    case ByteOrder
-> Int -> Integer -> Either IntegerToByteStringError ByteString
unsafeIntegerToByteString ByteOrder
endianness (Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
lengthArg) Integer
input of
      Left IntegerToByteStringError
err -> case IntegerToByteStringError
err of
        IntegerToByteStringError
NegativeInput -> do
          Text -> BuiltinResult ()
emit Text
"integerToByteString: cannot convert negative Integer"
          -- This does work proportional to the size of input. However, we're in a failing case
          -- anyway, and the user's paid for work proportional to this size in any case.
          Text -> BuiltinResult ()
emit (Text -> BuiltinResult ()) -> Text -> BuiltinResult ()
forall a b. (a -> b) -> a -> b
$ Text
"Input: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> ([Char] -> Text
pack ([Char] -> Text) -> (Integer -> [Char]) -> Integer -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Integer -> [Char]
forall a. Show a => a -> [Char]
show (Integer -> Text) -> Integer -> Text
forall a b. (a -> b) -> a -> b
$ Integer
input)
          BuiltinResult ByteString
forall err. AsEvaluationFailure err => err
evaluationFailure
        IntegerToByteStringError
NotEnoughDigits -> do
          Text -> BuiltinResult ()
emit Text
"integerToByteString: cannot represent Integer in given number of bytes"
          -- This does work proportional to the size of input. However, we're in a failing case
          -- anyway, and the user's paid for work proportional to this size in any case.
          Text -> BuiltinResult ()
emit (Text -> BuiltinResult ()) -> Text -> BuiltinResult ()
forall a b. (a -> b) -> a -> b
$ Text
"Input: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> ([Char] -> Text
pack ([Char] -> Text) -> (Integer -> [Char]) -> Integer -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Integer -> [Char]
forall a. Show a => a -> [Char]
show (Integer -> Text) -> Integer -> Text
forall a b. (a -> b) -> a -> b
$ Integer
input)
          Text -> BuiltinResult ()
emit (Text -> BuiltinResult ()) -> Text -> BuiltinResult ()
forall a b. (a -> b) -> a -> b
$ Text
"Bytes requested: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> ([Char] -> Text
pack ([Char] -> Text) -> (Integer -> [Char]) -> Integer -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Integer -> [Char]
forall a. Show a => a -> [Char]
show (Integer -> Text) -> Integer -> Text
forall a b. (a -> b) -> a -> b
$ Integer
lengthArg)
          BuiltinResult ByteString
forall err. AsEvaluationFailure err => err
evaluationFailure
      Right ByteString
result -> ByteString -> BuiltinResult ByteString
forall a. a -> BuiltinResult a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ByteString
result

-- | Conversion from 'Integer' to 'ByteString', as per
-- [CIP-121](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0121).
--

-- | Structured type to help indicate conversion errors.
data IntegerToByteStringError =
  NegativeInput |
  NotEnoughDigits
  deriving stock (IntegerToByteStringError -> IntegerToByteStringError -> Bool
(IntegerToByteStringError -> IntegerToByteStringError -> Bool)
-> (IntegerToByteStringError -> IntegerToByteStringError -> Bool)
-> Eq IntegerToByteStringError
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: IntegerToByteStringError -> IntegerToByteStringError -> Bool
== :: IntegerToByteStringError -> IntegerToByteStringError -> Bool
$c/= :: IntegerToByteStringError -> IntegerToByteStringError -> Bool
/= :: IntegerToByteStringError -> IntegerToByteStringError -> Bool
Eq, Int -> IntegerToByteStringError -> [Char] -> [Char]
[IntegerToByteStringError] -> [Char] -> [Char]
IntegerToByteStringError -> [Char]
(Int -> IntegerToByteStringError -> [Char] -> [Char])
-> (IntegerToByteStringError -> [Char])
-> ([IntegerToByteStringError] -> [Char] -> [Char])
-> Show IntegerToByteStringError
forall a.
(Int -> a -> [Char] -> [Char])
-> (a -> [Char]) -> ([a] -> [Char] -> [Char]) -> Show a
$cshowsPrec :: Int -> IntegerToByteStringError -> [Char] -> [Char]
showsPrec :: Int -> IntegerToByteStringError -> [Char] -> [Char]
$cshow :: IntegerToByteStringError -> [Char]
show :: IntegerToByteStringError -> [Char]
$cshowList :: [IntegerToByteStringError] -> [Char] -> [Char]
showList :: [IntegerToByteStringError] -> [Char] -> [Char]
Show)

endiannessArgToByteOrder :: Bool -> ByteOrder
endiannessArgToByteOrder :: Bool -> ByteOrder
endiannessArgToByteOrder Bool
b = if Bool
b then ByteOrder
BigEndian else ByteOrder
LittleEndian

-- For performance and clarity, the endianness argument uses
-- 'ByteOrder', and the length argument is an 'Int'.
-- This may not actually be unsafe, but it shouldn't be used outside this module.
unsafeIntegerToByteString :: ByteOrder -> Int -> Integer -> Either IntegerToByteStringError ByteString
unsafeIntegerToByteString :: ByteOrder
-> Int -> Integer -> Either IntegerToByteStringError ByteString
unsafeIntegerToByteString ByteOrder
requestedByteOrder Int
requestedLength Integer
input
  | Integer
input Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
< Integer
0 = IntegerToByteStringError
-> Either IntegerToByteStringError ByteString
forall a b. a -> Either a b
Left IntegerToByteStringError
NegativeInput
  | Integer
input Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0 = ByteString -> Either IntegerToByteStringError ByteString
forall a b. b -> Either a b
Right (ByteString -> Either IntegerToByteStringError ByteString)
-> (Word8 -> ByteString)
-> Word8
-> Either IntegerToByteStringError ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Word8 -> ByteString
BS.replicate Int
requestedLength (Word8 -> Either IntegerToByteStringError ByteString)
-> Word8 -> Either IntegerToByteStringError ByteString
forall a b. (a -> b) -> a -> b
$ Word8
0x00
  -- We use manual specialization to ensure as few branches in loop bodies as
  -- we can. See Note [Manual specialization] for details.
  | Int
requestedLength Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = ByteString -> Either IntegerToByteStringError ByteString
forall a b. b -> Either a b
Right (ByteString -> Either IntegerToByteStringError ByteString)
-> (Builder -> ByteString)
-> Builder
-> Either IntegerToByteStringError ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> ByteString
Builder.builderBytes (Builder -> Either IntegerToByteStringError ByteString)
-> Builder -> Either IntegerToByteStringError ByteString
forall a b. (a -> b) -> a -> b
$ case ByteOrder
requestedByteOrder of
      ByteOrder
LittleEndian -> Builder -> Integer -> Builder
goLENoLimit Builder
forall a. Monoid a => a
mempty Integer
input
      ByteOrder
BigEndian    -> Builder -> Integer -> Builder
goBENoLimit Builder
forall a. Monoid a => a
mempty Integer
input
  | Bool
otherwise = do
      let result :: Maybe Builder
result = case ByteOrder
requestedByteOrder of
                    ByteOrder
LittleEndian -> Builder -> Integer -> Maybe Builder
goLELimit Builder
forall a. Monoid a => a
mempty Integer
input
                    ByteOrder
BigEndian    -> Builder -> Integer -> Maybe Builder
goBELimit Builder
forall a. Monoid a => a
mempty Integer
input
      case Maybe Builder
result of
        Maybe Builder
Nothing -> IntegerToByteStringError
-> Either IntegerToByteStringError ByteString
forall a b. a -> Either a b
Left IntegerToByteStringError
NotEnoughDigits
        Just Builder
b  -> ByteString -> Either IntegerToByteStringError ByteString
forall a b. b -> Either a b
Right (ByteString -> Either IntegerToByteStringError ByteString)
-> (Builder -> ByteString)
-> Builder
-> Either IntegerToByteStringError ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> ByteString
Builder.builderBytes (Builder -> Either IntegerToByteStringError ByteString)
-> Builder -> Either IntegerToByteStringError ByteString
forall a b. (a -> b) -> a -> b
$ Builder
b
  where
    goLELimit :: Builder -> Integer -> Maybe Builder
    goLELimit :: Builder -> Integer -> Maybe Builder
goLELimit Builder
acc Integer
remaining
      | Integer
remaining Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0 = Builder -> Maybe Builder
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Builder -> Maybe Builder) -> Builder -> Maybe Builder
forall a b. (a -> b) -> a -> b
$ Builder -> Builder
padLE Builder
acc
      | Bool
otherwise = do
          -- builderLength is constant time, so we don't track the length ourselves
          Bool -> Maybe ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Builder -> Int
Builder.builderLength Builder
acc Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
requestedLength)
          -- This allows extracting eight digits at once. See Note [Loop sectioning] for details on
          -- why we do this. We also duplicate this code in several places: see Note [Manual
          -- specialization] for why.
          --
          -- The code is basically equivalent to remaining `quotRem` 2^64, but more efficient. This
          -- is for two reasons: firstly, GHC does not optimize divisions into shifts for Integer
          -- (even if the divisor is constant), and secondly, the pair generated by `quotRem` costs
          -- us as much as 15% peformance, and GHC seems unable to eliminate it. Thus, we have to do
          -- it like this instead.
          let newRemaining :: Integer
newRemaining = Integer
remaining Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
64
          -- Given that remaining must be non-negative, fromInteger here effectively truncates to a
          -- Word64, by retaining only the least-significant 8 bytes.
          let Word64
digitGroup :: Word64 = Integer -> Word64
forall a. Num a => Integer -> a
fromInteger Integer
remaining
          case Integer
newRemaining of
            Integer
0 -> Builder -> Word64 -> Maybe Builder
finishLELimit Builder
acc Word64
digitGroup
            Integer
_ -> Builder -> Integer -> Maybe Builder
goLELimit (Builder
acc Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Word64 -> Builder
forall a. Storable a => a -> Builder
Builder.storable Word64
digitGroup) Integer
newRemaining
    finishLELimit :: Builder -> Word64 -> Maybe Builder
    finishLELimit :: Builder -> Word64 -> Maybe Builder
finishLELimit Builder
acc Word64
remaining
      | Word64
remaining Word64 -> Word64 -> Bool
forall a. Eq a => a -> a -> Bool
== Word64
0 = Builder -> Maybe Builder
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Builder -> Maybe Builder) -> Builder -> Maybe Builder
forall a b. (a -> b) -> a -> b
$ Builder -> Builder
padLE Builder
acc
      | Bool
otherwise = do
          Bool -> Maybe ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Builder -> Int
Builder.builderLength Builder
acc Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
requestedLength)
          -- This is equivalent to 'remaining `quotRem` 256' followed by a conversion of the
          -- remainder, but faster. This is similar to the larger example above, and we do it for
          -- the same reasons.
          let newRemaining :: Word64
newRemaining = Word64
remaining Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
8
          let Word8
digit :: Word8 = Word64 -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word64
remaining
          Builder -> Word64 -> Maybe Builder
finishLELimit (Builder
acc Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Word8 -> Builder
Builder.word8 Word8
digit) Word64
newRemaining
    -- By separating the case where we don't need to concern ourselves with a
    -- user-specified limit, we can avoid branching needlessly, or doing a
    -- complex expression check on every loop. See Note [Manual specialization]
    -- for why this matters.
    goLENoLimit :: Builder -> Integer -> Builder
    goLENoLimit :: Builder -> Integer -> Builder
goLENoLimit Builder
acc Integer
remaining
      | Integer
remaining Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0 = Builder
acc
      | Bool
otherwise = let newRemaining :: Integer
newRemaining = Integer
remaining Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
64
                        Word64
digitGroup :: Word64 = Integer -> Word64
forall a. Num a => Integer -> a
fromInteger Integer
remaining
                      in case Integer
newRemaining of
                        Integer
0 -> Builder -> Word64 -> Builder
finishLENoLimit Builder
acc Word64
digitGroup
                        Integer
_ -> Builder -> Integer -> Builder
goLENoLimit (Builder
acc Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Word64 -> Builder
forall a. Storable a => a -> Builder
Builder.storable Word64
digitGroup) Integer
newRemaining
    finishLENoLimit :: Builder -> Word64 -> Builder
    finishLENoLimit :: Builder -> Word64 -> Builder
finishLENoLimit Builder
acc Word64
remaining
      | Word64
remaining Word64 -> Word64 -> Bool
forall a. Eq a => a -> a -> Bool
== Word64
0 = Builder
acc
      | Bool
otherwise =
          let newRemaining :: Word64
newRemaining = Word64
remaining Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
8
              Word8
digit :: Word8 = Word64 -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word64
remaining
           in Builder -> Word64 -> Builder
finishLENoLimit (Builder
acc Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Word8 -> Builder
Builder.word8 Word8
digit) Word64
newRemaining
    padLE :: Builder -> Builder
    padLE :: Builder -> Builder
padLE Builder
acc = let paddingLength :: Int
paddingLength = Int
requestedLength Int -> Int -> Int
forall a. Num a => a -> a -> a
- Builder -> Int
Builder.builderLength Builder
acc
      in Builder
acc Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> ByteString -> Builder
Builder.bytes (Int -> Word8 -> ByteString
BS.replicate Int
paddingLength Word8
0x0)
    -- We manually specialize the big-endian case: see Note [Manual specialization] for why.
    goBELimit :: Builder -> Integer -> Maybe Builder
    goBELimit :: Builder -> Integer -> Maybe Builder
goBELimit Builder
acc Integer
remaining
      | Integer
remaining Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0 = Builder -> Maybe Builder
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Builder -> Maybe Builder) -> Builder -> Maybe Builder
forall a b. (a -> b) -> a -> b
$ Builder -> Builder
padBE Builder
acc
      | Bool
otherwise = do
          Bool -> Maybe ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Builder -> Int
Builder.builderLength Builder
acc Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
requestedLength)
          let newRemaining :: Integer
newRemaining = Integer
remaining Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
64
          let Word64
digitGroup :: Word64 = Integer -> Word64
forall a. Num a => Integer -> a
fromInteger Integer
remaining
          case Integer
newRemaining of
            Integer
0 -> Builder -> Word64 -> Maybe Builder
finishBELimit Builder
acc Word64
digitGroup
            Integer
_ -> Builder -> Integer -> Maybe Builder
goBELimit (Word64 -> Builder
Builder.word64BE Word64
digitGroup Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
acc) Integer
newRemaining
    finishBELimit :: Builder -> Word64 -> Maybe Builder
    finishBELimit :: Builder -> Word64 -> Maybe Builder
finishBELimit Builder
acc Word64
remaining
      | Word64
remaining Word64 -> Word64 -> Bool
forall a. Eq a => a -> a -> Bool
== Word64
0 = Builder -> Maybe Builder
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Builder -> Maybe Builder) -> Builder -> Maybe Builder
forall a b. (a -> b) -> a -> b
$ Builder -> Builder
padBE Builder
acc
      | Bool
otherwise = do
          Bool -> Maybe ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Builder -> Int
Builder.builderLength Builder
acc Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
requestedLength)
          let newRemaining :: Word64
newRemaining = Word64
remaining Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
8
          let digit :: Word8
digit = Word64 -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word64
remaining
          Builder -> Word64 -> Maybe Builder
finishBELimit (Word8 -> Builder
Builder.word8 Word8
digit Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
acc) Word64
newRemaining
    goBENoLimit :: Builder -> Integer -> Builder
    goBENoLimit :: Builder -> Integer -> Builder
goBENoLimit Builder
acc Integer
remaining
      | Integer
remaining Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0 = Builder
acc
      | Bool
otherwise = let newRemaining :: Integer
newRemaining = Integer
remaining Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
64
                        digitGroup :: Word64
digitGroup = Integer -> Word64
forall a. Num a => Integer -> a
fromInteger Integer
remaining
                      in case Integer
newRemaining of
                        Integer
0 -> Builder -> Word64 -> Builder
finishBENoLimit Builder
acc Word64
digitGroup
                        Integer
_ -> Builder -> Integer -> Builder
goBENoLimit (Word64 -> Builder
Builder.word64BE Word64
digitGroup Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
acc) Integer
newRemaining
    finishBENoLimit :: Builder -> Word64 -> Builder
    finishBENoLimit :: Builder -> Word64 -> Builder
finishBENoLimit Builder
acc Word64
remaining
      | Word64
remaining Word64 -> Word64 -> Bool
forall a. Eq a => a -> a -> Bool
== Word64
0 = Builder
acc
      | Bool
otherwise = let newRemaining :: Word64
newRemaining = Word64
remaining Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
8
                        digit :: Word8
digit = Word64 -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word64
remaining
                      in Builder -> Word64 -> Builder
finishBENoLimit (Word8 -> Builder
Builder.word8 Word8
digit Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
acc) Word64
newRemaining
    padBE :: Builder -> Builder
    padBE :: Builder -> Builder
padBE Builder
acc = let paddingLength :: Int
paddingLength = Int
requestedLength Int -> Int -> Int
forall a. Num a => a -> a -> a
- Builder -> Int
Builder.builderLength Builder
acc in
      ByteString -> Builder
Builder.bytes (Int -> Word8 -> ByteString
BS.replicate Int
paddingLength Word8
0x0) Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
acc

-- | Conversion from 'ByteString' to 'Integer', as per
-- [CIP-121](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0121).

-- | Wrapper for 'unsafeByteStringToInteger' to make it more convenient to define as a builtin.
byteStringToInteger ::
  Bool -> ByteString -> Integer
byteStringToInteger :: Bool -> ByteString -> Integer
byteStringToInteger Bool
statedEndiannessArg ByteString
input =
  let endianness :: ByteOrder
endianness = Bool -> ByteOrder
endiannessArgToByteOrder Bool
statedEndiannessArg in
    ByteOrder -> ByteString -> Integer
unsafeByteStringToInteger ByteOrder
endianness ByteString
input

-- For clarity, the stated endianness argument uses 'ByteOrder'.
-- This function may not actually be unsafe, but it shouldn't be used outside this module.
unsafeByteStringToInteger :: ByteOrder -> ByteString -> Integer
  -- We use manual specialization to ensure as few branches in loop bodies as we can. See Note
  -- [Manual specialization] for details.
unsafeByteStringToInteger :: ByteOrder -> ByteString -> Integer
unsafeByteStringToInteger ByteOrder
statedByteOrder ByteString
input = case ByteOrder
statedByteOrder of
    -- Since padding bytes in the most-significant-last representation go at
    -- the end of the input, we can skip decoding them, as they won't affect
    -- the result in any way.
    ByteOrder
LittleEndian -> case (Word8 -> Bool) -> ByteString -> Maybe Int
BS.findIndexEnd (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x00) ByteString
input of
      -- If there are no nonzero bytes, it must be zero.
      Maybe Int
Nothing  -> Integer
0
      Just Int
end -> Integer -> Int -> Int -> Integer
goLE Integer
0 Int
end Int
0
    -- Since padding bytes in the most-significant-first representation go at
    -- the beginning of the input, we can skip decoding them, as they won't
    -- affect the result in any way.
    ByteOrder
BigEndian -> case (Word8 -> Bool) -> ByteString -> Maybe Int
BS.findIndex (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x00) ByteString
input of
      Maybe Int
Nothing  -> Integer
0
      Just Int
end -> Integer -> Int -> Int -> Int -> Integer
goBE Integer
0 Int
end Int
0 (ByteString -> Int
BS.length ByteString
input Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
  where
    -- Like with toByteString, we use loop sectioning to decode eight digits at once. See Note [Loop
    -- sectioning] for why we do this.
    goLE :: Integer -> Int -> Int -> Integer
    goLE :: Integer -> Int -> Int -> Integer
goLE Integer
acc Int
limit Int
ix
      | Int
ix Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= (Int
limit Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
7) =
          let digitGroup :: Word64
digitGroup = Int -> Word64
read64LE Int
ix
              -- Same as ix * 8, but faster. GHC might already do this optimization, but we may as
              -- well be sure.
              shift :: Int
shift = Int
ix Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
3
              newIx :: Int
newIx = Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
8
              -- We use unsafeShiftL to move a group of eight digits into the right position in
              -- the result, then combine with the accumulator. This is equivalent to a
              -- multiplication by 2^64*k, but significantly faster, as GHC doesn't optimize
              -- such multiplications into shifts for Integers.
              newAcc :: Integer
newAcc = Integer
acc Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
+ Word64 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word64
digitGroup Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
shift
            in Integer -> Int -> Int -> Integer
goLE Integer
newAcc Int
limit Int
newIx
      | Bool
otherwise = Integer -> Int -> Int -> Integer
finishLE Integer
acc Int
limit Int
ix
    finishLE :: Integer -> Int -> Int -> Integer
    finishLE :: Integer -> Int -> Int -> Integer
finishLE Integer
acc Int
limit Int
ix
      | Int
ix Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
limit = Integer
acc
      | Bool
otherwise =
          let digit :: Word8
digit = HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
input Int
ix
              shift :: Int
shift = Int
ix Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
3
              newIx :: Int
newIx = Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
              -- Similarly to before, we use unsafeShiftL to move a single digit into the right
              -- position in the result.
              newAcc :: Integer
newAcc = Integer
acc Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
+ Word8 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
digit Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
shift
            in Integer -> Int -> Int -> Integer
finishLE Integer
newAcc Int
limit Int
newIx
    -- Technically, ByteString does not allow reading of anything bigger than a single byte.
    -- However, because ByteStrings are counted arrays, caching already brings in adjacent bytes,
    -- which makes fetching them quite cheap. Additionally, GHC appears to optimize this into a
    -- block read of 64 bits at once, which saves memory movement. See Note [Superscalarity and
    -- caching] for details of why this matters.
    read64LE :: Int -> Word64
    read64LE :: Int -> Word64
read64LE Int
startIx =
      Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
input Int
startIx)
        Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
input (Int
startIx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)) Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
8)
        Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
input (Int
startIx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2)) Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
16)
        Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
input (Int
startIx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3)) Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
24)
        Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
input (Int
startIx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
4)) Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
32)
        Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
input (Int
startIx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
5)) Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
40)
        Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
input (Int
startIx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
6)) Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
48)
        Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
input (Int
startIx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
7)) Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
56)
    -- We manually specialize the big-endian cases: see Note [Manual specialization] for why.
    --
    -- In the big-endian case, shifts and indexes change in different ways: indexes start _high_
    -- and _reduce_, but shifts start _low_ and rise. This is different to the little-endian case,
    -- where both start low and rise. Thus, we track the index and shift separately in the
    -- big-endian case: it makes the adjustments easier, and doesn't really change anything, as if
    -- we wanted to compute the shift, we'd have to pass an offset argument anyway.
    goBE :: Integer -> Int -> Int -> Int -> Integer
    goBE :: Integer -> Int -> Int -> Int -> Integer
goBE Integer
acc Int
limit Int
shift Int
ix
      | Int
ix Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= (Int
limit Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
7) =
          let digitGroup :: Word64
digitGroup = Int -> Word64
read64BE Int
ix
              newShift :: Int
newShift = Int
shift Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
64
              newIx :: Int
newIx = Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
8
              newAcc :: Integer
newAcc = Integer
acc Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
+ Word64 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word64
digitGroup Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
shift
           in Integer -> Int -> Int -> Int -> Integer
goBE Integer
newAcc Int
limit Int
newShift Int
newIx
      | Bool
otherwise = Integer -> Int -> Int -> Int -> Integer
finishBE Integer
acc Int
limit Int
shift Int
ix
    finishBE :: Integer -> Int -> Int -> Int -> Integer
    finishBE :: Integer -> Int -> Int -> Int -> Integer
finishBE Integer
acc Int
limit Int
shift Int
ix
      | Int
ix Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
limit = Integer
acc
      | Bool
otherwise =
          let digit :: Word8
digit = HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
input Int
ix
              newShift :: Int
newShift = Int
shift Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
8
              newIx :: Int
newIx = Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
              newAcc :: Integer
newAcc = Integer
acc Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
+ Word8 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
digit Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
shift
           in Integer -> Int -> Int -> Int -> Integer
finishBE Integer
newAcc Int
limit Int
newShift Int
newIx
    read64BE :: Int -> Word64
    read64BE :: Int -> Word64
read64BE Int
endIx =
      Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
input Int
endIx)
        Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
input (Int
endIx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)) Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
8)
        Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
input (Int
endIx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
2)) Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
16)
        Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
input (Int
endIx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
3)) Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
24)
        Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
input (Int
endIx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
4)) Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
32)
        Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
input (Int
endIx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
5)) Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
40)
        Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
input (Int
endIx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
6)) Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
48)
        Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
input (Int
endIx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
7)) Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
56)

{- Note [Binary bitwise operation implementation and manual specialization]

   All of the 'binary' bitwise operations (namely `andByteString`,
   `orByteString` and `xorByteString`) operate similarly:

   1. Decide which of their two `ByteString` arguments determines the length
      of the result. For padding semantics, this is the _longer_ argument,
      whereas for truncation semantics, it's the _shorter_ one. If both
      `ByteString` arguments have identical length, it doesn't matter which we
      choose.
   2. Copy the choice made in step 1 into a fresh mutable buffer.
   3. Traverse over each byte of the argument _not_ chosen in step 1, and
      combine each of those bytes with the byte at the corresponding index of
      the fresh mutable buffer from step 2 (`.&.` for `andByteString`,
      `.|.` for `orByteString`, `xor` for `xorByteString`).

  We also make use of loop sectioning to optimize this operation: see Note
  [Loop sectioning] explaining why we do this. Fundamentally, this doesn't
  change the logic of the operation, but means that step 3 is split into
  two smaller sub-steps: we first word 8 bytes at a time, then one byte at a
  time to finish up if necessary. Other than the choice of 'combining
  operation', the structure of the computation is the same, which suggests that
  we want a helper function with a signature like

  helper1 ::
    (Word64 -> Word64 -> Word64) ->
    (Word8 -> Word8 -> Word8) ->
    ByteString ->
    ByteString ->
    Int ->
    ByteString

  or possibly (to avoid duplicate argument passing) like

  helper2 ::
    (forall (a :: Type) . Bits a => a -> a -> a) ->
    ByteString ->
    ByteString ->
    Int ->
    ByteString

  This would allow us to share all this logic, and have each of the 'top-level'
  operations just dispatch to either of the helpers with the appropriate
  function argument(s). Instead, we chose to write a manual copy of this logic
  for each of the 'top-level' operations, substituting only the 'combining
  operation'.

  We made this choice as any design based on either `helper1` or `helper2` is
  significantly slower (at least 50% worse, and the penalty _percentage_ grows
  with argument size). While `helper2` is significantly more penalizing than
  `helper1`, even `helper1` reaches an almost threefold slowdown at the higher
  input sizes we are interested in relative the manual version we use here.
  Due to the 'low-level' nature of Plutus Core primops, we consider these costs
  unacceptable relative the (small) benefits to code clarity and maintainability
  any solution using either style of helper would provide.

  The reason for `helper2` under-performing is unsurprising: any argument whose
  type is rank-2 polymorphic with a dictionary constraint essentially acts as
  a 'program template', which gets interpreted at runtime given some dictionary
  for a `Bits` instance. GHC can do practically nothing to optimize this, as
  there is no way to tell, for any given argument, _which_ definitions of an
  instance would be required here, even if the set of operations we use is
  finite, since any instance can make use of the full power of Haskell, which
  essentially lands us in Rice's Theorem territory. For `helper1`, the reasons
  are similar: it _must_ be able to work regardless of what functions (assuming
  appropriate types) it is given, which means in general, GHC is forced to
  compile mother-may-I-style code involving pointer chasing those arguments at
  runtime. This explains why the 'blowup' becomes worse with argument length.

  While in theory inlining could help with at least the `helper1` case (
  `helper2` is beyond that technique), it doesn't seem like GHC is able to
  figure this out, even with `INLINE` is placed on `helper1`.
  -}

-- | Bitwise logical AND, as per [CIP-122](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122).
{-# INLINEABLE andByteString #-}
andByteString :: Bool -> ByteString -> ByteString -> ByteString
andByteString :: Bool -> ByteString -> ByteString -> ByteString
andByteString Bool
shouldPad ByteString
bs1 ByteString
bs2 =
  let (ByteString
shorter, ByteString
longer) = if ByteString -> Int
BS.length ByteString
bs1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< ByteString -> Int
BS.length ByteString
bs2 then (ByteString
bs1, ByteString
bs2) else (ByteString
bs2, ByteString
bs1)
      (ByteString
toCopy, ByteString
toTraverse) = if Bool
shouldPad then (ByteString
longer, ByteString
shorter) else (ByteString
shorter, ByteString
longer)
   in ByteString -> ByteString -> Int -> ByteString
go ByteString
toCopy ByteString
toTraverse (ByteString -> Int
BS.length ByteString
shorter)
  where
    go :: ByteString -> ByteString -> Int -> ByteString
    go :: ByteString -> ByteString -> Int -> ByteString
go ByteString
toCopy ByteString
toTraverse Int
traverseLen =
      IO ByteString -> ByteString
forall a. IO a -> a
unsafeDupablePerformIO (IO ByteString -> ByteString)
-> ((CStringLen -> IO ByteString) -> IO ByteString)
-> (CStringLen -> IO ByteString)
-> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> (CStringLen -> IO ByteString) -> IO ByteString
forall a. ByteString -> (CStringLen -> IO a) -> IO a
BS.useAsCStringLen ByteString
toCopy ((CStringLen -> IO ByteString) -> ByteString)
-> (CStringLen -> IO ByteString) -> ByteString
forall a b. (a -> b) -> a -> b
$ \(Ptr CChar
copyPtr, Int
copyLen) ->
        ByteString -> (Ptr CChar -> IO ByteString) -> IO ByteString
forall a. ByteString -> (Ptr CChar -> IO a) -> IO a
BS.useAsCString ByteString
toTraverse ((Ptr CChar -> IO ByteString) -> IO ByteString)
-> (Ptr CChar -> IO ByteString) -> IO ByteString
forall a b. (a -> b) -> a -> b
$ \Ptr CChar
traversePtr -> do
          Int -> (Ptr Word8 -> IO ()) -> IO ByteString
BSI.create Int
copyLen ((Ptr Word8 -> IO ()) -> IO ByteString)
-> (Ptr Word8 -> IO ()) -> IO ByteString
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
dstPtr -> do
            Ptr Word8 -> Ptr Word8 -> Int -> IO ()
forall a. Ptr a -> Ptr a -> Int -> IO ()
copyBytes Ptr Word8
dstPtr (Ptr CChar -> Ptr Word8
forall a b. Ptr a -> Ptr b
castPtr Ptr CChar
copyPtr) Int
copyLen
            let (Int
bigStrides, Int
littleStrides) = Int
traverseLen Int -> Int -> (Int, Int)
forall a. Integral a => a -> a -> (a, a)
`quotRem` Int
8
            let offset :: Int
offset = Int
bigStrides Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
8
            let Ptr Word64
bigDstPtr :: Ptr Word64 = Ptr Word8 -> Ptr Word64
forall a b. Ptr a -> Ptr b
castPtr Ptr Word8
dstPtr
            let Ptr Word64
bigTraversePtr :: Ptr Word64 = Ptr CChar -> Ptr Word64
forall a b. Ptr a -> Ptr b
castPtr Ptr CChar
traversePtr
            [Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
t a -> (a -> f b) -> f ()
for_ [Int
0 .. Int
bigStrides Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
i -> do
              Word64
w64_1 <- Ptr Word64 -> Int -> IO Word64
forall a. Storable a => Ptr a -> Int -> IO a
peekElemOff Ptr Word64
bigDstPtr Int
i
              Word64
w64_2 <- Ptr Word64 -> Int -> IO Word64
forall a. Storable a => Ptr a -> Int -> IO a
peekElemOff Ptr Word64
bigTraversePtr Int
i
              Ptr Word64 -> Int -> Word64 -> IO ()
forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff Ptr Word64
bigDstPtr Int
i (Word64 -> IO ()) -> Word64 -> IO ()
forall a b. (a -> b) -> a -> b
$ Word64
w64_1 Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
Bits..&. Word64
w64_2
            let Ptr Word8
smallDstPtr :: Ptr Word8 = Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
dstPtr Int
offset
            let Ptr Word8
smallTraversePtr :: Ptr Word8 = Ptr CChar -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr CChar
traversePtr Int
offset
            [Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
t a -> (a -> f b) -> f ()
for_ [Int
0 .. Int
littleStrides Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
i -> do
              Word8
w8_1 <- Ptr Word8 -> Int -> IO Word8
forall a. Storable a => Ptr a -> Int -> IO a
peekElemOff Ptr Word8
smallDstPtr Int
i
              Word8
w8_2 <- Ptr Word8 -> Int -> IO Word8
forall a. Storable a => Ptr a -> Int -> IO a
peekElemOff Ptr Word8
smallTraversePtr Int
i
              Ptr Word8 -> Int -> Word8 -> IO ()
forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff Ptr Word8
smallDstPtr Int
i (Word8 -> IO ()) -> Word8 -> IO ()
forall a b. (a -> b) -> a -> b
$ Word8
w8_1 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
Bits..&. Word8
w8_2

-- | Bitwise logical OR, as per [CIP-122](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122).
{-# INLINEABLE orByteString #-}
orByteString :: Bool -> ByteString -> ByteString -> ByteString
orByteString :: Bool -> ByteString -> ByteString -> ByteString
orByteString Bool
shouldPad ByteString
bs1 ByteString
bs2 =
  let (ByteString
shorter, ByteString
longer) = if ByteString -> Int
BS.length ByteString
bs1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< ByteString -> Int
BS.length ByteString
bs2 then (ByteString
bs1, ByteString
bs2) else (ByteString
bs2, ByteString
bs1)
      (ByteString
toCopy, ByteString
toTraverse) = if Bool
shouldPad then (ByteString
longer, ByteString
shorter) else (ByteString
shorter, ByteString
longer)
   in ByteString -> ByteString -> Int -> ByteString
go ByteString
toCopy ByteString
toTraverse (ByteString -> Int
BS.length ByteString
shorter)
  where
    go :: ByteString -> ByteString -> Int -> ByteString
    go :: ByteString -> ByteString -> Int -> ByteString
go ByteString
toCopy ByteString
toTraverse Int
traverseLen =
      IO ByteString -> ByteString
forall a. IO a -> a
unsafeDupablePerformIO (IO ByteString -> ByteString)
-> ((CStringLen -> IO ByteString) -> IO ByteString)
-> (CStringLen -> IO ByteString)
-> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> (CStringLen -> IO ByteString) -> IO ByteString
forall a. ByteString -> (CStringLen -> IO a) -> IO a
BS.useAsCStringLen ByteString
toCopy ((CStringLen -> IO ByteString) -> ByteString)
-> (CStringLen -> IO ByteString) -> ByteString
forall a b. (a -> b) -> a -> b
$ \(Ptr CChar
copyPtr, Int
copyLen) ->
        ByteString -> (Ptr CChar -> IO ByteString) -> IO ByteString
forall a. ByteString -> (Ptr CChar -> IO a) -> IO a
BS.useAsCString ByteString
toTraverse ((Ptr CChar -> IO ByteString) -> IO ByteString)
-> (Ptr CChar -> IO ByteString) -> IO ByteString
forall a b. (a -> b) -> a -> b
$ \Ptr CChar
traversePtr -> do
          Int -> (Ptr Word8 -> IO ()) -> IO ByteString
BSI.create Int
copyLen ((Ptr Word8 -> IO ()) -> IO ByteString)
-> (Ptr Word8 -> IO ()) -> IO ByteString
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
dstPtr -> do
            Ptr Word8 -> Ptr Word8 -> Int -> IO ()
forall a. Ptr a -> Ptr a -> Int -> IO ()
copyBytes Ptr Word8
dstPtr (Ptr CChar -> Ptr Word8
forall a b. Ptr a -> Ptr b
castPtr Ptr CChar
copyPtr) Int
copyLen
            let (Int
bigStrides, Int
littleStrides) = Int
traverseLen Int -> Int -> (Int, Int)
forall a. Integral a => a -> a -> (a, a)
`quotRem` Int
8
            let offset :: Int
offset = Int
bigStrides Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
8
            let Ptr Word64
bigDstPtr :: Ptr Word64 = Ptr Word8 -> Ptr Word64
forall a b. Ptr a -> Ptr b
castPtr Ptr Word8
dstPtr
            let Ptr Word64
bigTraversePtr :: Ptr Word64 = Ptr CChar -> Ptr Word64
forall a b. Ptr a -> Ptr b
castPtr Ptr CChar
traversePtr
            [Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
t a -> (a -> f b) -> f ()
for_ [Int
0 .. Int
bigStrides Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
i -> do
              Word64
w64_1 <- Ptr Word64 -> Int -> IO Word64
forall a. Storable a => Ptr a -> Int -> IO a
peekElemOff Ptr Word64
bigDstPtr Int
i
              Word64
w64_2 <- Ptr Word64 -> Int -> IO Word64
forall a. Storable a => Ptr a -> Int -> IO a
peekElemOff Ptr Word64
bigTraversePtr Int
i
              Ptr Word64 -> Int -> Word64 -> IO ()
forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff Ptr Word64
bigDstPtr Int
i (Word64 -> IO ()) -> Word64 -> IO ()
forall a b. (a -> b) -> a -> b
$ Word64
w64_1 Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
Bits..|. Word64
w64_2
            let Ptr Word8
smallDstPtr :: Ptr Word8 = Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
dstPtr Int
offset
            let Ptr Word8
smallTraversePtr :: Ptr Word8 = Ptr CChar -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr CChar
traversePtr Int
offset
            [Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
t a -> (a -> f b) -> f ()
for_ [Int
0 .. Int
littleStrides Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
i -> do
              Word8
w8_1 <- Ptr Word8 -> Int -> IO Word8
forall a. Storable a => Ptr a -> Int -> IO a
peekElemOff Ptr Word8
smallDstPtr Int
i
              Word8
w8_2 <- Ptr Word8 -> Int -> IO Word8
forall a. Storable a => Ptr a -> Int -> IO a
peekElemOff Ptr Word8
smallTraversePtr Int
i
              Ptr Word8 -> Int -> Word8 -> IO ()
forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff Ptr Word8
smallDstPtr Int
i (Word8 -> IO ()) -> Word8 -> IO ()
forall a b. (a -> b) -> a -> b
$ Word8
w8_1 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
Bits..|. Word8
w8_2

-- | Bitwise logical XOR, as per [CIP-122](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122).
{-# INLINEABLE xorByteString #-}
xorByteString :: Bool -> ByteString -> ByteString -> ByteString
xorByteString :: Bool -> ByteString -> ByteString -> ByteString
xorByteString Bool
shouldPad ByteString
bs1 ByteString
bs2 =
  let (ByteString
shorter, ByteString
longer) = if ByteString -> Int
BS.length ByteString
bs1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< ByteString -> Int
BS.length ByteString
bs2 then (ByteString
bs1, ByteString
bs2) else (ByteString
bs2, ByteString
bs1)
      (ByteString
toCopy, ByteString
toTraverse) = if Bool
shouldPad then (ByteString
longer, ByteString
shorter) else (ByteString
shorter, ByteString
longer)
   in ByteString -> ByteString -> Int -> ByteString
go ByteString
toCopy ByteString
toTraverse (ByteString -> Int
BS.length ByteString
shorter)
  where
    go :: ByteString -> ByteString -> Int -> ByteString
    go :: ByteString -> ByteString -> Int -> ByteString
go ByteString
toCopy ByteString
toTraverse Int
traverseLen =
      IO ByteString -> ByteString
forall a. IO a -> a
unsafeDupablePerformIO (IO ByteString -> ByteString)
-> ((CStringLen -> IO ByteString) -> IO ByteString)
-> (CStringLen -> IO ByteString)
-> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> (CStringLen -> IO ByteString) -> IO ByteString
forall a. ByteString -> (CStringLen -> IO a) -> IO a
BS.useAsCStringLen ByteString
toCopy ((CStringLen -> IO ByteString) -> ByteString)
-> (CStringLen -> IO ByteString) -> ByteString
forall a b. (a -> b) -> a -> b
$ \(Ptr CChar
copyPtr, Int
copyLen) ->
        ByteString -> (Ptr CChar -> IO ByteString) -> IO ByteString
forall a. ByteString -> (Ptr CChar -> IO a) -> IO a
BS.useAsCString ByteString
toTraverse ((Ptr CChar -> IO ByteString) -> IO ByteString)
-> (Ptr CChar -> IO ByteString) -> IO ByteString
forall a b. (a -> b) -> a -> b
$ \Ptr CChar
traversePtr -> do
          Int -> (Ptr Word8 -> IO ()) -> IO ByteString
BSI.create Int
copyLen ((Ptr Word8 -> IO ()) -> IO ByteString)
-> (Ptr Word8 -> IO ()) -> IO ByteString
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
dstPtr -> do
            Ptr Word8 -> Ptr Word8 -> Int -> IO ()
forall a. Ptr a -> Ptr a -> Int -> IO ()
copyBytes Ptr Word8
dstPtr (Ptr CChar -> Ptr Word8
forall a b. Ptr a -> Ptr b
castPtr Ptr CChar
copyPtr) Int
copyLen
            let (Int
bigStrides, Int
littleStrides) = Int
traverseLen Int -> Int -> (Int, Int)
forall a. Integral a => a -> a -> (a, a)
`quotRem` Int
8
            let offset :: Int
offset = Int
bigStrides Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
8
            let Ptr Word64
bigDstPtr :: Ptr Word64 = Ptr Word8 -> Ptr Word64
forall a b. Ptr a -> Ptr b
castPtr Ptr Word8
dstPtr
            let Ptr Word64
bigTraversePtr :: Ptr Word64 = Ptr CChar -> Ptr Word64
forall a b. Ptr a -> Ptr b
castPtr Ptr CChar
traversePtr
            [Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
t a -> (a -> f b) -> f ()
for_ [Int
0 .. Int
bigStrides Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
i -> do
              Word64
w64_1 <- Ptr Word64 -> Int -> IO Word64
forall a. Storable a => Ptr a -> Int -> IO a
peekElemOff Ptr Word64
bigDstPtr Int
i
              Word64
w64_2 <- Ptr Word64 -> Int -> IO Word64
forall a. Storable a => Ptr a -> Int -> IO a
peekElemOff Ptr Word64
bigTraversePtr Int
i
              Ptr Word64 -> Int -> Word64 -> IO ()
forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff Ptr Word64
bigDstPtr Int
i (Word64 -> IO ()) -> Word64 -> IO ()
forall a b. (a -> b) -> a -> b
$ Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
Bits.xor Word64
w64_1 Word64
w64_2
            let Ptr Word8
smallDstPtr :: Ptr Word8 = Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
dstPtr Int
offset
            let Ptr Word8
smallTraversePtr :: Ptr Word8 = Ptr CChar -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr CChar
traversePtr Int
offset
            [Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
t a -> (a -> f b) -> f ()
for_ [Int
0 .. Int
littleStrides Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
i -> do
              Word8
w8_1 <- Ptr Word8 -> Int -> IO Word8
forall a. Storable a => Ptr a -> Int -> IO a
peekElemOff Ptr Word8
smallDstPtr Int
i
              Word8
w8_2 <- Ptr Word8 -> Int -> IO Word8
forall a. Storable a => Ptr a -> Int -> IO a
peekElemOff Ptr Word8
smallTraversePtr Int
i
              Ptr Word8 -> Int -> Word8 -> IO ()
forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff Ptr Word8
smallDstPtr Int
i (Word8 -> IO ()) -> Word8 -> IO ()
forall a b. (a -> b) -> a -> b
$ Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
Bits.xor Word8
w8_1 Word8
w8_2

-- | Bitwise logical complement, as per [CIP-122](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122).
{-# INLINEABLE complementByteString #-}
complementByteString :: ByteString -> ByteString
complementByteString :: ByteString -> ByteString
complementByteString ByteString
bs = IO ByteString -> ByteString
forall a. IO a -> a
unsafeDupablePerformIO (IO ByteString -> ByteString)
-> ((CStringLen -> IO ByteString) -> IO ByteString)
-> (CStringLen -> IO ByteString)
-> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> (CStringLen -> IO ByteString) -> IO ByteString
forall a. ByteString -> (CStringLen -> IO a) -> IO a
BS.useAsCStringLen ByteString
bs ((CStringLen -> IO ByteString) -> ByteString)
-> (CStringLen -> IO ByteString) -> ByteString
forall a b. (a -> b) -> a -> b
$ \(Ptr CChar
srcPtr, Int
len) -> do
  -- We use loop sectioning here; see Note [Loop sectioning] as to why we do this
  let (Int
bigStrides, Int
littleStrides) = Int
len Int -> Int -> (Int, Int)
forall a. Integral a => a -> a -> (a, a)
`quotRem` Int
8
  let offset :: Int
offset = Int
bigStrides Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
8
  Int -> (Ptr Word8 -> IO ()) -> IO ByteString
BSI.create Int
len ((Ptr Word8 -> IO ()) -> IO ByteString)
-> (Ptr Word8 -> IO ()) -> IO ByteString
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
dstPtr -> do
    let Ptr Word64
bigSrcPtr :: Ptr Word64 = Ptr CChar -> Ptr Word64
forall a b. Ptr a -> Ptr b
castPtr Ptr CChar
srcPtr
    let Ptr Word64
bigDstPtr :: Ptr Word64 = Ptr Word8 -> Ptr Word64
forall a b. Ptr a -> Ptr b
castPtr Ptr Word8
dstPtr
    [Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
t a -> (a -> f b) -> f ()
for_ [Int
0 .. Int
bigStrides Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
i -> do
      Word64
w64 <- Ptr Word64 -> Int -> IO Word64
forall a. Storable a => Ptr a -> Int -> IO a
peekElemOff Ptr Word64
bigSrcPtr Int
i
      Ptr Word64 -> Int -> Word64 -> IO ()
forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff Ptr Word64
bigDstPtr Int
i (Word64 -> IO ()) -> (Word64 -> Word64) -> Word64 -> IO ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word64 -> Word64
forall a. Bits a => a -> a
Bits.complement (Word64 -> IO ()) -> Word64 -> IO ()
forall a b. (a -> b) -> a -> b
$ Word64
w64
    let Ptr Word8
smallSrcPtr :: Ptr Word8 = Ptr CChar -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr CChar
srcPtr Int
offset
    let Ptr Word8
smallDstPtr :: Ptr Word8 = Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
dstPtr Int
offset
    [Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
t a -> (a -> f b) -> f ()
for_ [Int
0 .. Int
littleStrides Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
i -> do
      Word8
w8 <- Ptr Word8 -> Int -> IO Word8
forall a. Storable a => Ptr a -> Int -> IO a
peekElemOff Ptr Word8
smallSrcPtr Int
i
      Ptr Word8 -> Int -> Word8 -> IO ()
forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff Ptr Word8
smallDstPtr Int
i (Word8 -> IO ()) -> (Word8 -> Word8) -> Word8 -> IO ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Word8
forall a. Bits a => a -> a
Bits.complement (Word8 -> IO ()) -> Word8 -> IO ()
forall a b. (a -> b) -> a -> b
$ Word8
w8

-- | Bit read at index, as per [CIP-122](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122)
{-# INLINEABLE readBit #-}
readBit :: ByteString -> Int -> BuiltinResult Bool
readBit :: ByteString -> Int -> BuiltinResult Bool
readBit ByteString
bs Int
ix
  | Int
ix Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 = do
      Text -> BuiltinResult ()
emit Text
"readBit: index out of bounds"
      Text -> BuiltinResult ()
emit (Text -> BuiltinResult ()) -> Text -> BuiltinResult ()
forall a b. (a -> b) -> a -> b
$ Text
"Index: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> ([Char] -> Text
pack ([Char] -> Text) -> (Int -> [Char]) -> Int -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [Char]
forall a. Show a => a -> [Char]
show (Int -> Text) -> Int -> Text
forall a b. (a -> b) -> a -> b
$ Int
ix)
      BuiltinResult Bool
forall err. AsEvaluationFailure err => err
evaluationFailure
  | Int
ix Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
8 = do
      Text -> BuiltinResult ()
emit Text
"readBit: index out of bounds"
      Text -> BuiltinResult ()
emit (Text -> BuiltinResult ()) -> Text -> BuiltinResult ()
forall a b. (a -> b) -> a -> b
$ Text
"Index: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> ([Char] -> Text
pack ([Char] -> Text) -> (Int -> [Char]) -> Int -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [Char]
forall a. Show a => a -> [Char]
show (Int -> Text) -> Int -> Text
forall a b. (a -> b) -> a -> b
$ Int
ix)
      BuiltinResult Bool
forall err. AsEvaluationFailure err => err
evaluationFailure
  | Bool
otherwise = do
      let (Int
bigIx, Int
littleIx) = Int
ix Int -> Int -> (Int, Int)
forall a. Integral a => a -> a -> (a, a)
`quotRem` Int
8
      let flipIx :: Int
flipIx = Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
bigIx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
      Bool -> BuiltinResult Bool
forall a. a -> BuiltinResult a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Bool -> BuiltinResult Bool) -> Bool -> BuiltinResult Bool
forall a b. (a -> b) -> a -> b
$ Word8 -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
Bits.testBit (HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
bs Int
flipIx) Int
littleIx
  where
    len :: Int
    len :: Int
len = ByteString -> Int
BS.length ByteString
bs

-- | Bulk bit write, as per [CIP-122](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122)
{-# INLINEABLE writeBits #-}
writeBits :: ByteString -> [Integer] -> Bool -> BuiltinResult ByteString
writeBits :: ByteString -> [Integer] -> Bool -> BuiltinResult ByteString
writeBits ByteString
bs [Integer]
ixs Bool
bit = case IO (Either WriteBitsException ByteString)
-> Either WriteBitsException ByteString
forall a. IO a -> a
unsafeDupablePerformIO (IO (Either WriteBitsException ByteString)
 -> Either WriteBitsException ByteString)
-> (IO ByteString -> IO (Either WriteBitsException ByteString))
-> IO ByteString
-> Either WriteBitsException ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IO ByteString -> IO (Either WriteBitsException ByteString)
forall e a. Exception e => IO a -> IO (Either e a)
try (IO ByteString -> Either WriteBitsException ByteString)
-> IO ByteString -> Either WriteBitsException ByteString
forall a b. (a -> b) -> a -> b
$ IO ByteString
go of
  Left (WriteBitsException Integer
i) -> do
    Text -> BuiltinResult ()
emit Text
"writeBits: index out of bounds"
    Text -> BuiltinResult ()
emit (Text -> BuiltinResult ()) -> Text -> BuiltinResult ()
forall a b. (a -> b) -> a -> b
$ Text
"Index: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> ([Char] -> Text
pack ([Char] -> Text) -> (Integer -> [Char]) -> Integer -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Integer -> [Char]
forall a. Show a => a -> [Char]
show (Integer -> Text) -> Integer -> Text
forall a b. (a -> b) -> a -> b
$ Integer
i)
    BuiltinResult ByteString
forall err. AsEvaluationFailure err => err
evaluationFailure
  Right ByteString
result -> ByteString -> BuiltinResult ByteString
forall a. a -> BuiltinResult a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ByteString
result
  where
    -- This is written in a somewhat strange way. See Note [writeBits and
    -- exceptions], which covers why we did this.
    go :: IO ByteString
    go :: IO ByteString
go = ByteString -> (Ptr CChar -> IO ByteString) -> IO ByteString
forall a. ByteString -> (Ptr CChar -> IO a) -> IO a
BS.useAsCString ByteString
bs ((Ptr CChar -> IO ByteString) -> IO ByteString)
-> (Ptr CChar -> IO ByteString) -> IO ByteString
forall a b. (a -> b) -> a -> b
$ \Ptr CChar
srcPtr ->
          Int -> (Ptr Word8 -> IO ()) -> IO ByteString
BSI.create Int
len ((Ptr Word8 -> IO ()) -> IO ByteString)
-> (Ptr Word8 -> IO ()) -> IO ByteString
forall a b. (a -> b) -> a -> b
$
            \Ptr Word8
dstPtr ->
              let go2 :: [Integer] -> IO ()
go2 (Integer
i:[Integer]
is) = Ptr Word8 -> Integer -> IO ()
setOrClearAtIx Ptr Word8
dstPtr Integer
i IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> [Integer] -> IO ()
go2 [Integer]
is
                  go2 [Integer]
_      = () -> IO ()
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
              in do
                Ptr Word8 -> Ptr Word8 -> Int -> IO ()
forall a. Ptr a -> Ptr a -> Int -> IO ()
copyBytes Ptr Word8
dstPtr (Ptr CChar -> Ptr Word8
forall a b. Ptr a -> Ptr b
castPtr Ptr CChar
srcPtr) Int
len
                [Integer] -> IO ()
go2 [Integer]
ixs
    len :: Int
    len :: Int
len = ByteString -> Int
BS.length ByteString
bs
    bitLen :: Integer
    bitLen :: Integer
bitLen = Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* Integer
8
    setOrClearAtIx :: Ptr Word8 -> Integer -> IO ()
    setOrClearAtIx :: Ptr Word8 -> Integer -> IO ()
setOrClearAtIx Ptr Word8
ptr Integer
i
      | Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
< Integer
0 = WriteBitsException -> IO ()
forall e a. Exception e => e -> IO a
throwIO (WriteBitsException -> IO ()) -> WriteBitsException -> IO ()
forall a b. (a -> b) -> a -> b
$ Integer -> WriteBitsException
WriteBitsException Integer
i
      | Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Integer
bitLen = WriteBitsException -> IO ()
forall e a. Exception e => e -> IO a
throwIO (WriteBitsException -> IO ()) -> WriteBitsException -> IO ()
forall a b. (a -> b) -> a -> b
$ Integer -> WriteBitsException
WriteBitsException Integer
i
      | Bool
otherwise = do
          let (Integer
bigIx, Integer
littleIx) = Integer
i Integer -> Integer -> (Integer, Integer)
forall a. Integral a => a -> a -> (a, a)
`quotRem` Integer
8
          let flipIx :: Int
flipIx = Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
bigIx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
          Word8
w8 :: Word8 <- Ptr Word8 -> Int -> IO Word8
forall b. Ptr b -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr Word8
ptr Int
flipIx
          let toWrite :: Word8
toWrite = if Bool
bit
                        then Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
Bits.setBit Word8
w8 (Int -> Word8) -> (Integer -> Int) -> Integer -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer -> Word8) -> Integer -> Word8
forall a b. (a -> b) -> a -> b
$ Integer
littleIx
                        else Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
Bits.clearBit Word8
w8 (Int -> Word8) -> (Integer -> Int) -> Integer -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer -> Word8) -> Integer -> Word8
forall a b. (a -> b) -> a -> b
$ Integer
littleIx
          Ptr Word8 -> Int -> Word8 -> IO ()
forall b. Ptr b -> Int -> Word8 -> IO ()
forall a b. Storable a => Ptr b -> Int -> a -> IO ()
pokeByteOff Ptr Word8
ptr Int
flipIx Word8
toWrite
    {-# INLINEABLE setOrClearAtIx #-}

-- | Byte replication, as per [CIP-122](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122)
-- We want to cautious about the allocation of huge amounts of memory so we
-- impose the same length limit that's used in integerToByteString.
replicateByte :: Integer -> Word8 -> BuiltinResult ByteString
replicateByte :: Integer -> Word8 -> BuiltinResult ByteString
replicateByte Integer
len Word8
w8
  | Integer
len Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
< Integer
0 = do
      Text -> BuiltinResult ()
emit Text
"replicateByte: negative length requested"
      BuiltinResult ByteString
forall err. AsEvaluationFailure err => err
evaluationFailure
  | Integer
len Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
> Integer
maximumOutputLength = do
      Text -> BuiltinResult ()
emit (Text -> BuiltinResult ())
-> ([Char] -> Text) -> [Char] -> BuiltinResult ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> Text
pack ([Char] -> BuiltinResult ()) -> [Char] -> BuiltinResult ()
forall a b. (a -> b) -> a -> b
$ [Char]
"replicateByte: requested length is too long (maximum is "
               [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Integer -> [Char]
forall a. Show a => a -> [Char]
show Integer
maximumOutputLength
               [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" bytes)"
      Text -> BuiltinResult ()
emit (Text -> BuiltinResult ()) -> Text -> BuiltinResult ()
forall a b. (a -> b) -> a -> b
$ Text
"Length requested: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> ([Char] -> Text
pack ([Char] -> Text) -> (Integer -> [Char]) -> Integer -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Integer -> [Char]
forall a. Show a => a -> [Char]
show (Integer -> Text) -> Integer -> Text
forall a b. (a -> b) -> a -> b
$ Integer
len)
      BuiltinResult ByteString
forall err. AsEvaluationFailure err => err
evaluationFailure
  | Bool
otherwise = ByteString -> BuiltinResult ByteString
forall a. a -> BuiltinResult a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ByteString -> BuiltinResult ByteString)
-> (Word8 -> ByteString) -> Word8 -> BuiltinResult ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Word8 -> ByteString
BS.replicate (Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
len) (Word8 -> BuiltinResult ByteString)
-> Word8 -> BuiltinResult ByteString
forall a b. (a -> b) -> a -> b
$ Word8
w8

-- | Wrapper for calling 'unsafesShiftByteString' safely. Specifically, we avoid various edge cases:
--
-- * Empty 'ByteString's and zero moves don't do anything
-- * Bit moves whose absolute value is larger than the bit length produce all-zeroes
--
-- This also ensures we don't accidentally hit integer overflow issues.
shiftByteString :: ByteString -> Integer -> ByteString
shiftByteString :: ByteString -> Integer -> ByteString
shiftByteString ByteString
bs Integer
bitMove
  | ByteString -> Bool
BS.null ByteString
bs = ByteString
bs
  | Integer
bitMove Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0 = ByteString
bs
  | Bool
otherwise = let len :: Int
len = ByteString -> Int
BS.length ByteString
bs
                    bitLen :: Integer
bitLen = Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Integer) -> Int -> Integer
forall a b. (a -> b) -> a -> b
$ Int
8 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
len
                  in if Integer -> Integer
forall a. Num a => a -> a
abs Integer
bitMove Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Integer
bitLen
                     then Int -> Word8 -> ByteString
BS.replicate Int
len Word8
0x00
                     -- fromIntegral is safe to use here, as the only way this
                     -- could overflow (or underflow) an Int is if we had a
                     -- ByteString onchain that was over 30 petabytes in size.
                     else ByteString -> Int -> ByteString
unsafeShiftByteString ByteString
bs (Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
bitMove)

-- | Wrapper for calling 'unsafeRotateByteString' safely. Specifically, we avoid various edge cases:
--
-- * Empty 'ByteString's and zero moves don't do anything
-- * Bit moves whose absolute value is larger than the bit length are reduced modulo the length
--
-- Furthermore, we can convert all rotations into positive rotations, by noting that a rotation by @b@
-- is the same as a rotation by @b `mod` bitLen@, where @bitLen@ is the length of the 'ByteString'
-- argument in bits. This value is always non-negative, and if we get 0, we have nothing to do. This
-- reduction also helps us avoid integer overflow issues.
rotateByteString :: ByteString -> Integer -> ByteString
rotateByteString :: ByteString -> Integer -> ByteString
rotateByteString ByteString
bs Integer
bitMove
  | ByteString -> Bool
BS.null ByteString
bs = ByteString
bs
  | Bool
otherwise = let bitLen :: Integer
bitLen = Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Integer) -> Int -> Integer
forall a b. (a -> b) -> a -> b
$ Int
8 Int -> Int -> Int
forall a. Num a => a -> a -> a
* ByteString -> Int
BS.length ByteString
bs
                    -- This is guaranteed non-negative
                    reducedBitMove :: Integer
reducedBitMove = Integer
bitMove Integer -> Integer -> Integer
forall a. Integral a => a -> a -> a
`mod` Integer
bitLen
                  in if Integer
reducedBitMove Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0
                     then ByteString
bs
                     -- fromIntegral is safe to use here since for a bytestring to have a
                     -- size that doesn't fit into an `Int` it would have to have a size
                     -- exceeding something like 37 petabytes.
                     else ByteString -> Int -> ByteString
unsafeRotateByteString ByteString
bs (Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
reducedBitMove)

{- Note [Shift and rotation implementation]

Both shifts and rotations work similarly: they effectively impose a 'write
offset' to bits in the data argument, then write those bits to the result with
this offset applied. The difference between them is in what should be done if
the resulting offset index would fall out of bounds: shifts just discard the
data (and fill whatever remains with zeroes), while rotations 'wrap around'
modularly. This operation is bit parallel by definition, thus theoretically
making it amenable to the techniques described in Note [Loop sectioning].

However, the naive way of doing this runs into a problem: the byte ordering
on Tier 1 platforms inside `Word64` means that consecutive bit indexes
according to CIP-122 don't remain that way. We could avoid this by using a
byte flip followed by an adjustment in the opposite direction, then a byte flip
back again. However, this is a costly operation, and would also be extremely
fiddly across stride boundaries, making both performance and implementation
clarity suffer. Instead, we use a different observation, namely that both
shifts and rotations on the same input are monoidally homomorphic into
natural number addition (assuming the same 'direction' for shifts). Using
this, combined with Euclidean division, we can decompose any shift or
rotation by `i` into two consecutive shifts and rotations:

1. A 'large' shift or rotation, by `div i 8`; and
2. A 'small' shift or rotation, by `mod i 8`.

While on paper, this seems much less efficient (as our stride is smaller),
we also observe that the 'large' shift moves around whole bytes, while also
keeping consecutive bytes consecutive, assuming their bit indices remain
in-bounds. This means that we can implement step 1 both simply and efficiently:

* For shifts, we perform a partial copy of all the bytes whose bits remain
  in-bounds, followed by clearing of whatever remains.
* For rotations, we perform two partial copies: first of all the bytes whose
  bits remain in-bounds, followed by whatever remains, at the 'opposite end'.

These can make use of the bulk copying and clearing operations provided by the
GHC runtime. Not only are these shorter and more readable, they are also _far_
more efficient than anything we could do, as they rely on optimized C called
via the runtime (meaning no FFI penalty). From our experiments, both with
these operations, and others from CIP-122, we note that the cost of these is
essentially constant up to about the size of 1-2 cache lines (64-128 bytes):
since we anticipate smaller inputs are far more likely, this actually runs
_faster_ than our proposed sectioning approach, while being easier to read
and write.

It is arguable that our approach forces 'double writing', as Step 2 has to
possibly overwrite our work in Step 1. However, by avoiding the need to
perform byte flips, as well as benefitting from the huge speedups gained
from our split approach, this cost is essentially negligible, especially
given that we can operate mutably throughout. We also have an additional
benefit: if the requested rotation or shift happens to be an exact multiple
of 8, we can be _much_ faster, as Step 2 becomes unnecessary in that case.
-}

-- | Shifts, as per [CIP-123](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0123).
-- This may not actually be unsafe, but it shouldn't be used outside this module.
unsafeShiftByteString :: ByteString -> Int -> ByteString
unsafeShiftByteString :: ByteString -> Int -> ByteString
unsafeShiftByteString ByteString
bs Int
bitMove = IO ByteString -> ByteString
forall a. IO a -> a
unsafeDupablePerformIO (IO ByteString -> ByteString)
-> ((Ptr CChar -> IO ByteString) -> IO ByteString)
-> (Ptr CChar -> IO ByteString)
-> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> (Ptr CChar -> IO ByteString) -> IO ByteString
forall a. ByteString -> (Ptr CChar -> IO a) -> IO a
BS.useAsCString ByteString
bs ((Ptr CChar -> IO ByteString) -> ByteString)
-> (Ptr CChar -> IO ByteString) -> ByteString
forall a b. (a -> b) -> a -> b
$ \Ptr CChar
srcPtr ->
      Int -> (Ptr Word8 -> IO ()) -> IO ByteString
BSI.create Int
len ((Ptr Word8 -> IO ()) -> IO ByteString)
-> (Ptr Word8 -> IO ()) -> IO ByteString
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
dstPtr -> do
        -- To simplify our calculations, we work only with absolute values,
        -- letting different functions control for direction, instead of
        -- trying to unify the scheme for both positive and negative shifts.
        let magnitude :: Int
magnitude = Int -> Int
forall a. Num a => a -> a
abs Int
bitMove
        -- Instead of worrying about partial clearing, we just zero the entire
        -- block of memory, as the cost is essentially negligible and saves us
        -- a bunch of offset arithmetic.
        Ptr Word8 -> Word8 -> Int -> IO ()
forall a. Ptr a -> Word8 -> Int -> IO ()
fillBytes Ptr Word8
dstPtr Word8
0x00 Int
len
        Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (Int
magnitude Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
bitLen) (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ do
          let (Int
bigShift, Int
smallShift) = Int
magnitude Int -> Int -> (Int, Int)
forall a. Integral a => a -> a -> (a, a)
`quotRem` Int
8
          case Int -> Int
forall a. Num a => a -> a
signum Int
bitMove of
            (-1) -> Ptr Word8 -> Ptr Word8 -> Int -> Int -> IO ()
negativeShift (Ptr CChar -> Ptr Word8
forall a b. Ptr a -> Ptr b
castPtr Ptr CChar
srcPtr) Ptr Word8
dstPtr Int
bigShift Int
smallShift
            Int
_    -> Ptr Word8 -> Ptr Word8 -> Int -> Int -> IO ()
positiveShift (Ptr CChar -> Ptr Word8
forall a b. Ptr a -> Ptr b
castPtr Ptr CChar
srcPtr) Ptr Word8
dstPtr Int
bigShift Int
smallShift
  where
    len :: Int
    !len :: Int
len = ByteString -> Int
BS.length ByteString
bs
    bitLen :: Int
    !bitLen :: Int
bitLen = Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
8
    negativeShift :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> IO ()
    negativeShift :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> IO ()
negativeShift Ptr Word8
srcPtr Ptr Word8
dstPtr Int
bigShift Int
smallShift = do
      let copyDstPtr :: Ptr b
copyDstPtr = Ptr Word8 -> Int -> Ptr b
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
dstPtr Int
bigShift
      let copyLen :: Int
copyLen = Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
bigShift
      -- Since we already zeroed everything, we only do the partial copy.
      Ptr Word8 -> Ptr Word8 -> Int -> IO ()
forall a. Ptr a -> Ptr a -> Int -> IO ()
copyBytes Ptr Word8
forall {b}. Ptr b
copyDstPtr Ptr Word8
srcPtr Int
copyLen
      Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
smallShift Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0) (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ do
        -- When working with the small shift, we have to shift bits across
        -- byte boundaries. Thus, we have to make sure that:
        --
        -- 1. We 'save' our first byte from being processed.
        -- 2. We can 'select' the bits that would be shifted over the
        --    boundary and apply them.
        let !invSmallShift :: Int
invSmallShift = Int
8 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
smallShift
        let !mask :: Word8
mask = Word8
0xFF Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
`Bits.unsafeShiftR` Int
invSmallShift
        [Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
t a -> (a -> f b) -> f ()
for_ [Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1, Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
2 .. Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
copyLen] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
byteIx -> do
          -- To handle shifts across byte boundaries, we have to 'read
          -- backwards', mask off the relevant part, then recombine.
          !(Word8
currentByte :: Word8) <- Ptr Word8 -> Int -> IO Word8
forall b. Ptr b -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr Word8
dstPtr Int
byteIx
          !(Word8
prevByte :: Word8) <- Ptr Word8 -> Int -> IO Word8
forall b. Ptr b -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr Word8
dstPtr (Int
byteIx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
          let !prevOverflowBits :: Word8
prevOverflowBits = Word8
prevByte Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
Bits..&. Word8
mask
          let !newCurrentByte :: Word8
newCurrentByte =
                (Word8
currentByte Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
`Bits.unsafeShiftR` Int
smallShift)
                  Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
Bits..|. (Word8
prevOverflowBits Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
`Bits.unsafeShiftL` Int
invSmallShift)
          Ptr Word8 -> Int -> Word8 -> IO ()
forall b. Ptr b -> Int -> Word8 -> IO ()
forall a b. Storable a => Ptr b -> Int -> a -> IO ()
pokeByteOff Ptr Word8
dstPtr Int
byteIx Word8
newCurrentByte
        !(Word8
firstByte :: Word8) <- Ptr Word8 -> Int -> IO Word8
forall b. Ptr b -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr Word8
dstPtr (Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
copyLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
        Ptr Word8 -> Int -> Word8 -> IO ()
forall b. Ptr b -> Int -> Word8 -> IO ()
forall a b. Storable a => Ptr b -> Int -> a -> IO ()
pokeByteOff Ptr Word8
dstPtr (Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
copyLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) (Word8
firstByte Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
`Bits.unsafeShiftR` Int
smallShift)
    -- This works similarly to `negativeShift` above, but in the opposite direction.
    positiveShift :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> IO ()
    positiveShift :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> IO ()
positiveShift Ptr Word8
srcPtr Ptr Word8
dstPtr Int
bigShift Int
smallShift = do
      let copySrcPtr :: Ptr b
copySrcPtr = Ptr Word8 -> Int -> Ptr b
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
srcPtr Int
bigShift
      let copyLen :: Int
copyLen = Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
bigShift
      Ptr Word8 -> Ptr Word8 -> Int -> IO ()
forall a. Ptr a -> Ptr a -> Int -> IO ()
copyBytes Ptr Word8
dstPtr Ptr Word8
forall {b}. Ptr b
copySrcPtr Int
copyLen
      Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
smallShift Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0) (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ do
        let !invSmallShift :: Int
invSmallShift = Int
8 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
smallShift
        let !mask :: Word8
mask = Word8
0xFF Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
`Bits.unsafeShiftL` Int
invSmallShift
        [Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
t a -> (a -> f b) -> f ()
for_ [Int
0, Int
1 .. Int
copyLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
2] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
byteIx -> do
          !(Word8
currentByte :: Word8) <- Ptr Word8 -> Int -> IO Word8
forall b. Ptr b -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr Word8
dstPtr Int
byteIx
          !(Word8
nextByte :: Word8) <- Ptr Word8 -> Int -> IO Word8
forall b. Ptr b -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr Word8
dstPtr (Int
byteIx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
          let !nextOverflowBits :: Word8
nextOverflowBits = Word8
nextByte Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
Bits..&. Word8
mask
          let !newCurrentByte :: Word8
newCurrentByte =
                (Word8
currentByte Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
`Bits.unsafeShiftL` Int
smallShift)
                  Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
Bits..|. (Word8
nextOverflowBits Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
`Bits.unsafeShiftR` Int
invSmallShift)
          Ptr Word8 -> Int -> Word8 -> IO ()
forall b. Ptr b -> Int -> Word8 -> IO ()
forall a b. Storable a => Ptr b -> Int -> a -> IO ()
pokeByteOff Ptr Word8
dstPtr Int
byteIx Word8
newCurrentByte
        !(Word8
lastByte :: Word8) <- Ptr Word8 -> Int -> IO Word8
forall b. Ptr b -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr Word8
dstPtr (Int
copyLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
        Ptr Word8 -> Int -> Word8 -> IO ()
forall b. Ptr b -> Int -> Word8 -> IO ()
forall a b. Storable a => Ptr b -> Int -> a -> IO ()
pokeByteOff Ptr Word8
dstPtr (Int
copyLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) (Word8
lastByte Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
`Bits.unsafeShiftL` Int
smallShift)

-- | Rotations, as per [CIP-123](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0123).
-- This is defintely unsafe: calling it with bitMove = minBound::Int can cause a
-- segmentation fault.  It must not be used outside this module.
unsafeRotateByteString :: ByteString -> Int -> ByteString
unsafeRotateByteString :: ByteString -> Int -> ByteString
unsafeRotateByteString ByteString
bs Int
bitMove = IO ByteString -> ByteString
forall a. IO a -> a
unsafeDupablePerformIO (IO ByteString -> ByteString)
-> ((Ptr CChar -> IO ByteString) -> IO ByteString)
-> (Ptr CChar -> IO ByteString)
-> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> (Ptr CChar -> IO ByteString) -> IO ByteString
forall a. ByteString -> (Ptr CChar -> IO a) -> IO a
BS.useAsCString ByteString
bs ((Ptr CChar -> IO ByteString) -> ByteString)
-> (Ptr CChar -> IO ByteString) -> ByteString
forall a b. (a -> b) -> a -> b
$ \Ptr CChar
srcPtr ->
  Int -> (Ptr Word8 -> IO ()) -> IO ByteString
BSI.create Int
len ((Ptr Word8 -> IO ()) -> IO ByteString)
-> (Ptr Word8 -> IO ()) -> IO ByteString
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
dstPtr -> do
    -- The move is guaranteed positive and reduced already. Thus, unlike for
    -- shifts, we don't need two variants for different directions.
    let (Int
bigRotation, Int
smallRotation) = Int
bitMove Int -> Int -> (Int, Int)
forall a. Integral a => a -> a -> (a, a)
`quotRem` Int
8
    Ptr Word8 -> Ptr Word8 -> Int -> Int -> IO ()
go (Ptr CChar -> Ptr Word8
forall a b. Ptr a -> Ptr b
castPtr Ptr CChar
srcPtr) Ptr Word8
dstPtr Int
bigRotation Int
smallRotation
  where
    len :: Int
    !len :: Int
len = ByteString -> Int
BS.length ByteString
bs
    go :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> IO ()
    go :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> IO ()
go Ptr Word8
srcPtr Ptr Word8
dstPtr Int
bigRotate Int
smallRotate = do
      -- Two partial copies are needed here, unlike with shifts, because
      -- there's no point zeroing our data, since it'll all be overwritten
      -- with stuff from the input anyway.
      let copyStartSrcPtr :: Ptr b
copyStartSrcPtr = Ptr Word8 -> Int -> Ptr b
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
srcPtr Int
bigRotate
      let copyStartLen :: Int
copyStartLen = Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
bigRotate
      Ptr Word8 -> Ptr Word8 -> Int -> IO ()
forall a. Ptr a -> Ptr a -> Int -> IO ()
copyBytes Ptr Word8
dstPtr Ptr Word8
forall {b}. Ptr b
copyStartSrcPtr Int
copyStartLen
      let copyEndDstPtr :: Ptr b
copyEndDstPtr = Ptr Word8 -> Int -> Ptr b
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
dstPtr Int
copyStartLen
      Ptr Word8 -> Ptr Word8 -> Int -> IO ()
forall a. Ptr a -> Ptr a -> Int -> IO ()
copyBytes Ptr Word8
forall {b}. Ptr b
copyEndDstPtr Ptr Word8
srcPtr Int
bigRotate
      Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
smallRotate Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0) (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ do
        -- This works similarly to shifts
        let !invSmallRotate :: Int
invSmallRotate = Int
8 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
smallRotate
        let !mask :: Word8
mask = Word8
0xFF Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
`Bits.unsafeShiftL` Int
invSmallRotate
        !(Word8
cloneFirstByte :: Word8) <- Ptr Word8 -> Int -> IO Word8
forall b. Ptr b -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr Word8
dstPtr Int
0
        [Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
t a -> (a -> f b) -> f ()
for_ [Int
0, Int
1 .. Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
2] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
byteIx -> do
          !(Word8
currentByte :: Word8) <- Ptr Word8 -> Int -> IO Word8
forall b. Ptr b -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr Word8
dstPtr Int
byteIx
          !(Word8
nextByte :: Word8) <- Ptr Word8 -> Int -> IO Word8
forall b. Ptr b -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr Word8
dstPtr (Int
byteIx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
          let !nextOverflowBits :: Word8
nextOverflowBits = Word8
nextByte Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
Bits..&. Word8
mask
          let !newCurrentByte :: Word8
newCurrentByte =
                (Word8
currentByte Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
`Bits.unsafeShiftL` Int
smallRotate)
                  Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
Bits..|. (Word8
nextOverflowBits Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
`Bits.unsafeShiftR` Int
invSmallRotate)
          Ptr Word8 -> Int -> Word8 -> IO ()
forall b. Ptr b -> Int -> Word8 -> IO ()
forall a b. Storable a => Ptr b -> Int -> a -> IO ()
pokeByteOff Ptr Word8
dstPtr Int
byteIx Word8
newCurrentByte
        !(Word8
lastByte :: Word8) <- Ptr Word8 -> Int -> IO Word8
forall b. Ptr b -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr Word8
dstPtr (Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
        let !firstOverflowBits :: Word8
firstOverflowBits = Word8
cloneFirstByte Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
Bits..&. Word8
mask
        let !newLastByte :: Word8
newLastByte =
              (Word8
lastByte Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
`Bits.unsafeShiftL` Int
smallRotate)
                Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
Bits..|. (Word8
firstOverflowBits Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
`Bits.unsafeShiftR` Int
invSmallRotate)
        Ptr Word8 -> Int -> Word8 -> IO ()
forall b. Ptr b -> Int -> Word8 -> IO ()
forall a b. Storable a => Ptr b -> Int -> a -> IO ()
pokeByteOff Ptr Word8
dstPtr (Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) Word8
newLastByte

-- | Counting the number of set bits, as per [CIP-123](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0123).
countSetBits :: ByteString -> Int
countSetBits :: ByteString -> Int
countSetBits ByteString
bs = IO Int -> Int
forall a. IO a -> a
unsafeDupablePerformIO (IO Int -> Int)
-> ((Ptr CChar -> IO Int) -> IO Int)
-> (Ptr CChar -> IO Int)
-> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> (Ptr CChar -> IO Int) -> IO Int
forall a. ByteString -> (Ptr CChar -> IO a) -> IO a
BS.useAsCString ByteString
bs ((Ptr CChar -> IO Int) -> Int) -> (Ptr CChar -> IO Int) -> Int
forall a b. (a -> b) -> a -> b
$ \Ptr CChar
srcPtr -> do
  -- See Note [Loop sectioning] for details of why we
  -- define this function the way it is. We make use of the fact that `popCount`
  -- is bit-parallel, and has a constant-time implementation for `Word64` and `Word8`.
  let Ptr Word64
bigSrcPtr :: Ptr Word64 = Ptr CChar -> Ptr Word64
forall a b. Ptr a -> Ptr b
castPtr Ptr CChar
srcPtr
  let Ptr Word8
smallSrcPtr :: Ptr Word8 = Ptr CChar -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr CChar
srcPtr Int
offset
  Ptr Word64 -> Ptr Word8 -> Int -> Int -> IO Int
goBig Ptr Word64
bigSrcPtr Ptr Word8
smallSrcPtr Int
0 Int
0
  where
    len :: Int
    !len :: Int
len = ByteString -> Int
BS.length ByteString
bs
    -- We do this as two separate bindings, for similar reasons as for
    -- `integerToByteString`: we take a surprising hit to performance when
    -- using a pair, even though eliminating it should be possible here.
    bigStrides :: Int
    !bigStrides :: Int
bigStrides = Int
len Int -> Int -> Int
forall a. Integral a => a -> a -> a
`quot` Int
8
    smallStrides :: Int
    !smallStrides :: Int
smallStrides = Int
len Int -> Int -> Int
forall a. Integral a => a -> a -> a
`rem` Int
8
    offset :: Int
    !offset :: Int
offset = Int
bigStrides Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
8
    goBig :: Ptr Word64 -> Ptr Word8 -> Int -> Int -> IO Int
    goBig :: Ptr Word64 -> Ptr Word8 -> Int -> Int -> IO Int
goBig !Ptr Word64
bigSrcPtr !Ptr Word8
smallSrcPtr !Int
acc !Int
bigIx
      | Int
bigIx Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
bigStrides = Ptr Word8 -> Int -> Int -> IO Int
goSmall Ptr Word8
smallSrcPtr Int
acc Int
0
      | Bool
otherwise = do
          !Word64
w64 <- Ptr Word64 -> Int -> IO Word64
forall a. Storable a => Ptr a -> Int -> IO a
peekElemOff Ptr Word64
bigSrcPtr Int
bigIx
          Ptr Word64 -> Ptr Word8 -> Int -> Int -> IO Int
goBig Ptr Word64
bigSrcPtr Ptr Word8
smallSrcPtr (Int
acc Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word64 -> Int
forall a. Bits a => a -> Int
Bits.popCount Word64
w64) (Int
bigIx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
    goSmall :: Ptr Word8 -> Int -> Int -> IO Int
    goSmall :: Ptr Word8 -> Int -> Int -> IO Int
goSmall !Ptr Word8
smallSrcPtr !Int
acc !Int
smallIx
      | Int
smallIx Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
smallStrides = Int -> IO Int
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
acc
      | Bool
otherwise = do
          !Word8
w8 <- Ptr Word8 -> Int -> IO Word8
forall a. Storable a => Ptr a -> Int -> IO a
peekElemOff Ptr Word8
smallSrcPtr Int
smallIx
          Ptr Word8 -> Int -> Int -> IO Int
goSmall Ptr Word8
smallSrcPtr (Int
acc Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall a. Bits a => a -> Int
Bits.popCount Word8
w8) (Int
smallIx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)

-- | Finding the first set bit's index, as per [CIP-123](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0123).
findFirstSetBit :: ByteString -> Int
findFirstSetBit :: ByteString -> Int
findFirstSetBit ByteString
bs = IO Int -> Int
forall a. IO a -> a
unsafeDupablePerformIO (IO Int -> Int)
-> ((Ptr CChar -> IO Int) -> IO Int)
-> (Ptr CChar -> IO Int)
-> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> (Ptr CChar -> IO Int) -> IO Int
forall a. ByteString -> (Ptr CChar -> IO a) -> IO a
BS.useAsCString ByteString
bs ((Ptr CChar -> IO Int) -> Int) -> (Ptr CChar -> IO Int) -> Int
forall a b. (a -> b) -> a -> b
$ \Ptr CChar
srcPtr -> do
  let Ptr Word64
bigSrcPtr :: Ptr Word64 = Ptr CChar -> Ptr Word64
forall a b. Ptr a -> Ptr b
castPtr Ptr CChar
srcPtr
  Ptr Word64 -> Int -> Int -> IO Int
goBig Ptr Word64
bigSrcPtr Int
0 (Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
8)
  where
    -- We implement this operation in a somewhat unusual way, to try and
    -- benefit from bit paralellism, thus allowing loop sectioning as well:
    -- see Note [Loop sectioning] as to why we choose to
    -- do this.
    --
    -- Finding the first set bit is not (inherently) bit parallel, as there is
    -- a clear 'horizontal dependency'. Thus, we instead 'localize' this
    -- 'horizontal dependency' by noting that the following operations _are_
    -- bit-parallel:
    --
    -- 1. Checking if all bits are zero
    -- 2. Keeping an additive accumulator
    --
    -- Essentially, we begin by taking large steps through our data, checking
    -- whether we only have zeroes. This can be done in strides of 64 bits at a
    -- time, and every time we find that many zeroes, we keep track. After we
    -- encounter a nonzero `Word64`, we 'step down' to `Word8`-sized steps,
    -- continuing to count zero blocks the same way. Once we encounter a
    -- non-zero `Word8`, we can resort to the specialized operation for
    -- counting trailing zeroes from `Data.Bits`, and 'top up' our accumulated
    -- count to produce the index we want. If we ever 'walk off the end', we
    -- know that there's no way we could find any set bits and return -1.
    --
    -- This is complicated slightly by us having to walk the input backwards
    -- instead of forwards, but due to the requirements of the CIP-122 bit
    -- indexing scheme, we don't really have a choice here. This doesn't
    -- affect the description above however: it just complicates the indexing
    -- maths required.
    goBig :: Ptr Word64 -> Int -> Int -> IO Int
    goBig :: Ptr Word64 -> Int -> Int -> IO Int
goBig !Ptr Word64
bigSrcPtr !Int
acc !Int
byteIx
        -- We can do at least one large step. This works because we read
        -- backwards, which means that `byteIx` is the _last_ position we read
      | Int
byteIx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 = do
          !(Word64
w64 :: Word64) <- Ptr Word64 -> Int -> IO Word64
forall b. Ptr b -> Int -> IO Word64
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr Word64
bigSrcPtr Int
byteIx
          -- In theory, we could use the same technique here as we do in
          -- `goSmall`, namely count speculatively and then compare to 64.
          -- However this is not possible for us, as the native byte ordering
          -- on Tier 1 platforms does not keep consecutive bits _across_ bytes
          -- consecutive, which would make this result unreliable. While we
          -- _could_ do a byte order flip before counting (from the opposite
          -- end) to avoid this, the cost of this operation is much larger
          -- than a comparison to zero, and would only benefit us _once_,
          -- instead of once-per-stride. Thus, we instead use the approach
          -- here.
          if Word64
w64 Word64 -> Word64 -> Bool
forall a. Eq a => a -> a -> Bool
== Word64
0x0
            then Ptr Word64 -> Int -> Int -> IO Int
goBig Ptr Word64
bigSrcPtr (Int
acc Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
64) (Int
byteIx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
8)
            else Ptr Word8 -> Int -> Int -> IO Int
goSmall (Ptr Word64 -> Ptr Word8
forall a b. Ptr a -> Ptr b
castPtr Ptr Word64
bigSrcPtr) Int
acc (Int
byteIx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
7)
        -- We've 'walked off the end' and not found anything, so everything
        -- must be zeroes
      | Int
byteIx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= (-Int
8) = Int -> IO Int
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (-Int
1)
        -- We can end up here in one of two ways:
        --
        -- 1. Our input `ByteString` is 7 bytes long or smaller; or
        -- 2. We have done all the large steps we can, and have between 1
        --    and 7 bytes to go.
        --
        -- In either case, we forward the accumulator (which will be 0 in
        -- case 1) to small stepping. Combining these cases allows us to
        -- avoid separate tests for these conditions.
      | Bool
otherwise = Ptr Word8 -> Int -> Int -> IO Int
goSmall (Ptr Word64 -> Ptr Word8
forall a b. Ptr a -> Ptr b
castPtr Ptr Word64
bigSrcPtr) Int
acc (Int
8 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
byteIx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
    goSmall :: Ptr Word8 -> Int -> Int -> IO Int
    goSmall :: Ptr Word8 -> Int -> Int -> IO Int
goSmall !Ptr Word8
smallSrcPtr !Int
acc !Int
byteIx
      | Int
byteIx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 = Int -> IO Int
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (-Int
1)
      | Bool
otherwise = do
          !(Word8
w8 :: Word8) <- Ptr Word8 -> Int -> IO Word8
forall b. Ptr b -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr Word8
smallSrcPtr Int
byteIx
          -- Instead of redundantly first checking for a zero byte,
          -- then counting, we speculatively count, relying on the behaviour of
          -- `countTrailingZeros` that, on a zero byte, we get 8.
          let !counted :: Int
counted = Word8 -> Int
forall b. FiniteBits b => b -> Int
Bits.countTrailingZeros Word8
w8
          let !newAcc :: Int
newAcc = Int
acc Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
counted
          if Int
counted Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
8
            then Ptr Word8 -> Int -> Int -> IO Int
goSmall Ptr Word8
smallSrcPtr Int
newAcc (Int
byteIx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
            else Int -> IO Int
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
newAcc
    len :: Int
    !len :: Int
len = ByteString -> Int
BS.length ByteString
bs

-- Helpers

{- Note [writeBits and exceptions]

   As `writeBits` allows us to pass a changelist argument of any length, we
   potentially could have an out-of-bounds index anywhere in the list. As we
   have to fail on such cases (and report them appropriately), we end up needing
   _both_ IO (to do mutable things) as well as a way to signal errors. We can
   do this in two ways:

   1. Pre-scan the changelist for any out-of-bounds indexes, fail if we see any,
      then apply the necessary changes if no out-of-bounds indexes are found.
   2. Speculatively allocate the new `ByteString`, then do the changes in the
      changelist argument one at a time, failing as soon as we see an out-of-bounds
      index.

  Option 1 would require traversing the changelist argument twice, which is
  undesirable, which means that option 2 is the more efficient choice. The
  natural choice for option 2 would be something similar to `ExceptT Int IO`
  (with the `Int` being an out-of-bounds index). However, we aren't able to do
  this, as ultimately, `ByteString`s are implemented as `ForeignPtr`s, forcing
  us to use the following function to interact with them, directly or not:

  withForeignPtr :: forall (a :: Type) . ForeignPtr a -> (Ptr a -> IO b) -> IO b

  Notably, the function argument produces a result of `IO b`, whereas we would
  need `MonadIO m => m b` instead. This means that our _only_ choice is to
  use the exception mechanism, either directly or via some wrappers like
  `MonadUnliftIO`. While this is unusual, and arguably against the spirit of
  the use of `IO` relative `ByteString` construction, we don't have any other
  choice. We decided to use the exception mechanism directly, as while
  `MonadUnliftIO` is a bit cleaner, it ultimately ends up doing the same thing
  anyway, and this method at least makes it clear what we're doing.

  This doesn't pose any problems from the point of view of Plutus Core, as this
  exception cannot 'leak'; we handle it entirely within `writeBits`, and no
  other Plutus Core code can ever see it.
-}
newtype WriteBitsException = WriteBitsException Integer
  deriving stock (WriteBitsException -> WriteBitsException -> Bool
(WriteBitsException -> WriteBitsException -> Bool)
-> (WriteBitsException -> WriteBitsException -> Bool)
-> Eq WriteBitsException
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: WriteBitsException -> WriteBitsException -> Bool
== :: WriteBitsException -> WriteBitsException -> Bool
$c/= :: WriteBitsException -> WriteBitsException -> Bool
/= :: WriteBitsException -> WriteBitsException -> Bool
Eq, Int -> WriteBitsException -> [Char] -> [Char]
[WriteBitsException] -> [Char] -> [Char]
WriteBitsException -> [Char]
(Int -> WriteBitsException -> [Char] -> [Char])
-> (WriteBitsException -> [Char])
-> ([WriteBitsException] -> [Char] -> [Char])
-> Show WriteBitsException
forall a.
(Int -> a -> [Char] -> [Char])
-> (a -> [Char]) -> ([a] -> [Char] -> [Char]) -> Show a
$cshowsPrec :: Int -> WriteBitsException -> [Char] -> [Char]
showsPrec :: Int -> WriteBitsException -> [Char] -> [Char]
$cshow :: WriteBitsException -> [Char]
show :: WriteBitsException -> [Char]
$cshowList :: [WriteBitsException] -> [Char] -> [Char]
showList :: [WriteBitsException] -> [Char] -> [Char]
Show)

instance Exception WriteBitsException

{- Note [Manual specialization]
For both integerToByteString and byteStringToInteger, we have to perform very
similar operations, but with small variations:

- Most-significant-first versus most-significant-last (for both)
- Whether we have a size limit or not (for integerToByteString)

Additionally, loop sectioning (see Note [Loop sectioning]) requires us to have
separate 'big-stride' and 'small-stride' operations to ensure universality of
input handling. Lastly, we have several subroutines (digit extraction, for
instance) that may vary in similar ways. In such a case, generalization by
means of abstraction seems like a good idea, as the operations (and
subroutines) vary little.

At the same time, to determine which variation of any given function (or
subroutine) we need, we only have to scrutinize the relevant argument(s) once:
these specifics (such as byte order) don't change during the course of the
operation. Thus, we want to make sure that these checks in the code are _also_
performed only once, ideally at the beginning.

However, if we write such operations naively as so:

> subroutine byteOrder arg1 arg2 = case byteOrder of
>       LittleEndian -> ...
>       BigEndian -> ...

the byteOrder argument will be scrutinized on each call of subroutine. This is
correct in general (as there is no guarantee that the argument will be stable).
Strangely, however, even in a case like this one:

> mainRoutine byteOrder arg1 arg2 = ...
>    where
>       subroutine arg3 = case byteOrder of
>           LittleEndian -> ...
>           BigEndian -> ...

GHC _still_ re-scrutinizes byteOrder in every call of subroutine! This penalty
can be somewhat lessened using a form similar to this:

> mainRoutine byteOrder arg1 arg2 = ...
>     where
>        !subroutine = case byteOrder of
>             LittleEndian -> \arg3 -> ...
>             BigEndian -> \arg3 -> ...

but this is _still_ between 20 and 30% worse than doing something like this:

> mainRoutine byteOrder arg1 arg2 = case byteOrder of
>     LittleEndian -> [code calling subroutineLE where needed]
>     BigEndian -> [code calling subroutineBE where needed]
>     where
>         subroutineLE arg3 = ...
>         subroutineBE arg3 = ...

This form _ensures_ we scrutinize (and branch) only the number of times we have
to, and in a predictable place. Since these are backends for Plutus Core primops,
and performance is thus critical, we choose to use this manually-specialized form
for each combination of relevant arguments. While this is repetitive, and thus
also somewhat error-prone, the performance penalty for not doing this is
unacceptable.
-}

{- Note [Loop sectioning]

Several operations in this module (including binary logical operations,
`integerToByteString` and `byteStringToInteger`) effectively function as loops
over fixed-width binary chunks: these can be bytes (for logical operations),
digits (for conversions) or something else. These chunks have to be read,
written or both, and may also require processing using fixed-width,
constant-time operations over those chunks from the Haskell side, in some
cases effectively 'translating' these fixed-size operations into variable-width
equivalents over `ByteString`s. In all cases, this involves trafficking data
between memory and machine registers (as `ByteString`s and `Integer`s are both
wrappers around counted arrays), as well as the overheads of looping
(involving comparison and branches). This trafficking is necessary not only to
move the memory around, but also to process it, as on modern architectures,
data must first be moved into a register in order to do anything with it.

However, on all architectures of interest (essentially, 64-bit Tier 1),
general-purpose registers (GPRs henceforth) are 64 bits (or 8 bytes).
Furthermore, the primary cost of moving data between memory and registers is
having to overcome the 'memory wall': the exact amount of data being moved
doesn't affect this very much. In addition to this, when we operate on single
bytes, the remaining 56 bits of the GPR holding that data are essentially
'wasted'. In the situation we have (namely, operating over arrays, whose data
is adjacent in memory), we thus get two sources of inefficiency:

* Despite paying the cost for a memory transfer, we transfer only one-eighth
  the data we could; and
* Despite transferring data from memory to registers, we utilize the register
  at only one-eighth capacity.

This essentially means we perform _eight times_ more rotations of the loop,
and memory moves, than we need to!

To avoid this, we use a technique known as _loop sectioning_.
Effectively, this transforms our homogenous loop (that always operates one byte at
a time) into a heterogenous loop: first, we operate on a larger section (called
a _stride_) until we can no longer do this, and then we finish up using byte
at a time processing. Essentially, when given an input like this:

[ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10 ]

the homogeous byte-at-a-time approach would process it like so:

  _   _   _   _   _   _   _   _   _   _
[ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10 ]

for a total of 10 memory transfers and 10 loop spins, whereas a loop-sectioned
approach with a stride of 8 would instead process like so:

  ______________________________  _   _
[ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10 ]

Giving us only _three_ memory transfers and _three_ loop spins instead. This
effectively reduces our work by a factor of 8. In our cases, this is almost
free, as there is no data processing to be done: all we need to do is copy
from one place to another, essentially.

This technique only benefits us because counted arrays are cache-friendly: see
Note [Superscalarity and caching] for a longer explanation of this and why it
matters.

Further information:

- Tier 1 GHC platform list:
    https://gitlab.haskell.org/ghc/ghc/-/wikis/platforms#tier-1-platforms
- Memory wall:
    https://link.springer.com/referenceworkentry/10.1007/978-0-387-09766-4_234
- Loop sectioning in more detail:
    http://physics.ujep.cz/~zmoravec/prga/main_for/mergedProjects/optaps_for/common/optaps_vec_mine.htm
-}

{- Note [Superscalarity and caching]
On modern architectures, in order to process data, it must first be moved from
memory into a register. This operation has some cost (known as the 'memory wall'),
which is largely independent of how much data gets moved (assuming the register
can hold it): moving one byte, or a whole register's worth, costs about the same.
To reduce this cost, CPU manufacturers have introduced _cache hierarchies_,
which are designed to limit the cost of the wall, as long as the data access
matches the cache's optimal usage pattern. Thus, while an idealized view of
the memory hierachy is this:

Registers
---------
Memory

in reality, the view is more like this:

Registers
---------
L1 cache
---------
L2 cache
---------
L3 cache (on some platforms)
---------
Memory

Each 'higher' cache in the hierarchy is smaller, but faster, and when a memory
fetch is requested in code, in addition to moving the requested data to a
register, that data (plus some more) is moved into caches as well. The amount
of data moved into cache (a _cache line_) is typically eight machine words on
modern architectures (and definitely is the case for all Tier 1 GHC platforms):
for the cases concerning Plutus, that is 64 bytes. Therefore, if data we need
soon after a fetch is _physically_ nearby, it won't need to be fetched from
memory: instead, it would come from a cache, which is faster (by a considerable
margin).

To see how this can matter, consider the following ByteString:

[ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11 ]

The ByteString (being a counted array) has all of its data physically adjacent
to each other. Suppose we wanted to fetch the byte at index 1 (second position).
The naive view of what happens is like this:

Registers: [b2] [ ] [ ] .... [ ]
Memory: [ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11 ]

Thus, it would appear that, if we wanted a different position's value, we would
need to fetch from memory again. However, what _actually_ happens is more like this:

Registers: [b2] [ ] [ ] .... [ ]
L1 cache: [ b2, b3, b4, b5, b6, b7, b8, b9, b10, b11 ]
Memory: [ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11 ]

We note that b2, as well as its adjacent elements, were _all_ pulled into the L1
cache. This can only work because all these elements are physically adjacent in
memory. The improvement in performance from this cache use is _very_ non-trivial:
an L1 cache is about 200 times faster than a memory access, and an L2 cache about
20 times faster.

To take further advantage of this, modern CPUs (and all Tier 1 GHC platforms have
this capability) are _superscalar_. To explain what this means, let's consider the
naive view of how CPUs execute instructions: namely, it is one-at-a-time, and
synchronous. While CPUs must give the _appearance_ that they behave this way, in
practice, CPU execution is very much asynchronous: due to the proliferation of ALUs
on a single chip, having twice as many processing units is much cheaper than having
processing units run twice as fast. Thus, if there are no data dependencies
between instructions, CPUs can (and do!) execute them simultaneously, stalling to
await results if a data dependency is detected. This can be done automatically
using Tomasulo's algorithm, which ensures no conflicts with maximum throughput.

Superscalarity interacts well with the cache hierarchy, as it makes data more
easily available for processing, provided there is enough 'work to do', and no
data dependencies. In our situation, most of what we do is data _movement_ from
one memory location to another, which by its very nature lacks any data
dependencies.

Further references:

- Numbers for cache and memory transfers: https://gist.github.com/jboner/2841832
- Superscalarity: https://en.wikipedia.org/wiki/Superscalar_processor
- Tomasulo's algorithm: https://en.wikipedia.org/wiki/Tomasulo%27s_algorithm
-}