{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}

-- | Hashing of S3 objects
--
--   This module allows us to fetch objects from S3, taking advantage of S3's
--   support for CAS to avoid the need to calculate our own content hashes.
module Data.CAS.ContentHashable.S3 where

import qualified Aws
import qualified Aws.S3 as S3
import Control.Monad ((>=>))
import Control.Monad.Trans.Resource (runResourceT)
import Data.Aeson
import Data.CAS.ContentHashable
import Data.Constraint
import Data.Reflection
import GHC.Generics (Generic)
import Network.HTTP.Conduit
  ( newManager,
    tlsManagerSettings,
  )

-- | Reference to an object in an S3 bucket
--
--   Objects can be referenced in a few ways, so this
--   type is parametrised over the object reference.
--   Currently, this is expected to be:
--   - S3.Object (alias for Text)
--   - S3.ObjectInfo
data ObjectInBucket obj = ObjectInBucket
  { ObjectInBucket obj -> Bucket
_oibBucket :: S3.Bucket,
    ObjectInBucket obj -> obj
_oibObject :: obj
  }
  deriving (Int -> ObjectInBucket obj -> ShowS
[ObjectInBucket obj] -> ShowS
ObjectInBucket obj -> String
(Int -> ObjectInBucket obj -> ShowS)
-> (ObjectInBucket obj -> String)
-> ([ObjectInBucket obj] -> ShowS)
-> Show (ObjectInBucket obj)
forall obj. Show obj => Int -> ObjectInBucket obj -> ShowS
forall obj. Show obj => [ObjectInBucket obj] -> ShowS
forall obj. Show obj => ObjectInBucket obj -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [ObjectInBucket obj] -> ShowS
$cshowList :: forall obj. Show obj => [ObjectInBucket obj] -> ShowS
show :: ObjectInBucket obj -> String
$cshow :: forall obj. Show obj => ObjectInBucket obj -> String
showsPrec :: Int -> ObjectInBucket obj -> ShowS
$cshowsPrec :: forall obj. Show obj => Int -> ObjectInBucket obj -> ShowS
Show, (forall x. ObjectInBucket obj -> Rep (ObjectInBucket obj) x)
-> (forall x. Rep (ObjectInBucket obj) x -> ObjectInBucket obj)
-> Generic (ObjectInBucket obj)
forall x. Rep (ObjectInBucket obj) x -> ObjectInBucket obj
forall x. ObjectInBucket obj -> Rep (ObjectInBucket obj) x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
forall obj x. Rep (ObjectInBucket obj) x -> ObjectInBucket obj
forall obj x. ObjectInBucket obj -> Rep (ObjectInBucket obj) x
$cto :: forall obj x. Rep (ObjectInBucket obj) x -> ObjectInBucket obj
$cfrom :: forall obj x. ObjectInBucket obj -> Rep (ObjectInBucket obj) x
Generic)

-- | A lens to _oibBucket
oibBucket :: Functor f => (S3.Bucket -> f S3.Bucket) -> ObjectInBucket obj -> f (ObjectInBucket obj)
oibBucket :: (Bucket -> f Bucket)
-> ObjectInBucket obj -> f (ObjectInBucket obj)
oibBucket Bucket -> f Bucket
f ObjectInBucket obj
oib = Bucket -> ObjectInBucket obj
rebuild (Bucket -> ObjectInBucket obj)
-> f Bucket -> f (ObjectInBucket obj)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Bucket -> f Bucket
f (ObjectInBucket obj -> Bucket
forall obj. ObjectInBucket obj -> Bucket
_oibBucket ObjectInBucket obj
oib)
  where
    rebuild :: Bucket -> ObjectInBucket obj
rebuild Bucket
b = ObjectInBucket obj
oib {_oibBucket :: Bucket
_oibBucket = Bucket
b}

-- | A lens to _oibObject
oibObject :: Functor f => (a -> f b) -> ObjectInBucket a -> f (ObjectInBucket b)
oibObject :: (a -> f b) -> ObjectInBucket a -> f (ObjectInBucket b)
oibObject a -> f b
f ObjectInBucket a
oib = b -> ObjectInBucket b
forall obj. obj -> ObjectInBucket obj
rebuild (b -> ObjectInBucket b) -> f b -> f (ObjectInBucket b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> a -> f b
f (ObjectInBucket a -> a
forall obj. ObjectInBucket obj -> obj
_oibObject ObjectInBucket a
oib)
  where
    rebuild :: obj -> ObjectInBucket obj
rebuild obj
o = ObjectInBucket a
oib {_oibObject :: obj
_oibObject = obj
o}

instance FromJSON (ObjectInBucket S3.Object)

instance ToJSON (ObjectInBucket S3.Object)

class ObjectReference a where
  objectReference :: a -> S3.Object

instance ObjectReference S3.Object where
  objectReference :: Bucket -> Bucket
objectReference = Bucket -> Bucket
forall a. a -> a
id

instance ObjectReference S3.ObjectInfo where
  objectReference :: ObjectInfo -> Bucket
objectReference = ObjectInfo -> Bucket
S3.objectKey

-- | An S3 object is hashable whenever we have sufficient configuration to
--   access said object. To deal with this, we use reflection to reify a value
--   (the AWS configuration) into a class constraint.
--   To use this instance, you must reify the value using 'give':
--   @
--     cfg <- Aws.baseConfiguration
--     give cfg $ contentHash s3object
--   @
--
--   Since S3 is already content hashed, we do not need to actually hash the
--   object ourselves. In fact, we avoid fetching the object, and only
--   request the metadata including the content hash.
--   We incorporate the bucket and name into this to give extra guarantees on
--   uniqueness, but we may be better abolishing this to deduplicate files
--   stored in multiple places.
instance
  (Given Aws.Configuration) =>
  ContentHashable IO (ObjectInBucket S3.Object)
  where
  contentHashUpdate :: Context SHA256 -> ObjectInBucket Bucket -> IO (Context SHA256)
contentHashUpdate Context SHA256
ctx ObjectInBucket Bucket
a =
    let s3cfg :: S3Configuration NormalQuery
s3cfg = S3Configuration NormalQuery
forall config. DefaultServiceConfiguration config => config
Aws.defServiceConfig :: S3.S3Configuration Aws.NormalQuery
     in do
          {- Set up a ResourceT region with an available HTTP manager. -}
          Manager
mgr <- ManagerSettings -> IO Manager
newManager ManagerSettings
tlsManagerSettings

          {- Create a request object with S3.getObject and run the request with pureAws. -}
          S3.GetObjectResponse {gorMetadata :: GetObjectResponse -> ObjectMetadata
S3.gorMetadata = ObjectMetadata
md} <-
            ResourceT IO GetObjectResponse -> IO GetObjectResponse
forall (m :: * -> *) a. MonadUnliftIO m => ResourceT m a -> m a
runResourceT (ResourceT IO GetObjectResponse -> IO GetObjectResponse)
-> ResourceT IO GetObjectResponse -> IO GetObjectResponse
forall a b. (a -> b) -> a -> b
$
              Configuration
-> ServiceConfiguration GetObject NormalQuery
-> Manager
-> GetObject
-> ResourceT IO GetObjectResponse
forall r a.
Transaction r a =>
Configuration
-> ServiceConfiguration r NormalQuery
-> Manager
-> r
-> ResourceT IO a
Aws.pureAws Configuration
forall a. Given a => a
given S3Configuration NormalQuery
ServiceConfiguration GetObject NormalQuery
s3cfg Manager
mgr (GetObject -> ResourceT IO GetObjectResponse)
-> GetObject -> ResourceT IO GetObjectResponse
forall a b. (a -> b) -> a -> b
$
                Bucket -> Bucket -> GetObject
S3.getObject (ObjectInBucket Bucket -> Bucket
forall obj. ObjectInBucket obj -> Bucket
_oibBucket ObjectInBucket Bucket
a) (ObjectInBucket Bucket -> Bucket
forall obj. ObjectInBucket obj -> obj
_oibObject ObjectInBucket Bucket
a)

          (Context SHA256 -> Bucket -> IO (Context SHA256))
-> Bucket -> Context SHA256 -> IO (Context SHA256)
forall a b c. (a -> b -> c) -> b -> a -> c
flip Context SHA256 -> Bucket -> IO (Context SHA256)
forall (m :: * -> *) a.
ContentHashable m a =>
Context SHA256 -> a -> m (Context SHA256)
contentHashUpdate (ObjectInBucket Bucket -> Bucket
forall obj. ObjectInBucket obj -> Bucket
_oibBucket ObjectInBucket Bucket
a)
            (Context SHA256 -> IO (Context SHA256))
-> (Context SHA256 -> IO (Context SHA256))
-> Context SHA256
-> IO (Context SHA256)
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> (Context SHA256 -> Bucket -> IO (Context SHA256))
-> Bucket -> Context SHA256 -> IO (Context SHA256)
forall a b c. (a -> b -> c) -> b -> a -> c
flip Context SHA256 -> Bucket -> IO (Context SHA256)
forall (m :: * -> *) a.
ContentHashable m a =>
Context SHA256 -> a -> m (Context SHA256)
contentHashUpdate (ObjectInBucket Bucket -> Bucket
forall obj. ObjectInBucket obj -> obj
_oibObject ObjectInBucket Bucket
a)
            (Context SHA256 -> IO (Context SHA256))
-> (Context SHA256 -> IO (Context SHA256))
-> Context SHA256
-> IO (Context SHA256)
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> (Context SHA256 -> Bucket -> IO (Context SHA256))
-> Bucket -> Context SHA256 -> IO (Context SHA256)
forall a b c. (a -> b -> c) -> b -> a -> c
flip Context SHA256 -> Bucket -> IO (Context SHA256)
forall (m :: * -> *) a.
ContentHashable m a =>
Context SHA256 -> a -> m (Context SHA256)
contentHashUpdate (ObjectMetadata -> Bucket
S3.omETag ObjectMetadata
md)
            (Context SHA256 -> IO (Context SHA256))
-> Context SHA256 -> IO (Context SHA256)
forall a b. (a -> b) -> a -> b
$ Context SHA256
ctx

-- | Reified instance of the implication to allow us to use this as a
--   constraint.
instance
  (Given Aws.Configuration)
    :=> ContentHashable IO (ObjectInBucket S3.Object)
  where
  ins :: Given Configuration :- ContentHashable IO (ObjectInBucket Bucket)
ins = (Given Configuration =>
 Dict (ContentHashable IO (ObjectInBucket Bucket)))
-> Given Configuration
   :- ContentHashable IO (ObjectInBucket Bucket)
forall (a :: Constraint) (b :: Constraint). (a => Dict b) -> a :- b
Sub Given Configuration =>
Dict (ContentHashable IO (ObjectInBucket Bucket))
forall (a :: Constraint). a => Dict a
Dict

-- | When we already have `ObjectInfo` (because we have, for example, queried
--   the bucket), we can calculate the 'ContentHash' directly without recourse
--   do S3, because we already know the S3 hash.
instance Monad m => ContentHashable m (ObjectInBucket S3.ObjectInfo) where
  contentHashUpdate :: Context SHA256 -> ObjectInBucket ObjectInfo -> m (Context SHA256)
contentHashUpdate Context SHA256
ctx ObjectInBucket ObjectInfo
a =
    (Context SHA256 -> Bucket -> m (Context SHA256))
-> Bucket -> Context SHA256 -> m (Context SHA256)
forall a b c. (a -> b -> c) -> b -> a -> c
flip Context SHA256 -> Bucket -> m (Context SHA256)
forall (m :: * -> *) a.
ContentHashable m a =>
Context SHA256 -> a -> m (Context SHA256)
contentHashUpdate (ObjectInBucket ObjectInfo -> Bucket
forall obj. ObjectInBucket obj -> Bucket
_oibBucket ObjectInBucket ObjectInfo
a)
      (Context SHA256 -> m (Context SHA256))
-> (Context SHA256 -> m (Context SHA256))
-> Context SHA256
-> m (Context SHA256)
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> (Context SHA256 -> Bucket -> m (Context SHA256))
-> Bucket -> Context SHA256 -> m (Context SHA256)
forall a b c. (a -> b -> c) -> b -> a -> c
flip Context SHA256 -> Bucket -> m (Context SHA256)
forall (m :: * -> *) a.
ContentHashable m a =>
Context SHA256 -> a -> m (Context SHA256)
contentHashUpdate (ObjectInfo -> Bucket
S3.objectKey (ObjectInfo -> Bucket) -> ObjectInfo -> Bucket
forall a b. (a -> b) -> a -> b
$ ObjectInBucket ObjectInfo -> ObjectInfo
forall obj. ObjectInBucket obj -> obj
_oibObject ObjectInBucket ObjectInfo
a)
      (Context SHA256 -> m (Context SHA256))
-> (Context SHA256 -> m (Context SHA256))
-> Context SHA256
-> m (Context SHA256)
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> (Context SHA256 -> Bucket -> m (Context SHA256))
-> Bucket -> Context SHA256 -> m (Context SHA256)
forall a b c. (a -> b -> c) -> b -> a -> c
flip Context SHA256 -> Bucket -> m (Context SHA256)
forall (m :: * -> *) a.
ContentHashable m a =>
Context SHA256 -> a -> m (Context SHA256)
contentHashUpdate (ObjectInfo -> Bucket
S3.objectETag (ObjectInfo -> Bucket) -> ObjectInfo -> Bucket
forall a b. (a -> b) -> a -> b
$ ObjectInBucket ObjectInfo -> ObjectInfo
forall obj. ObjectInBucket obj -> obj
_oibObject ObjectInBucket ObjectInfo
a)
      (Context SHA256 -> m (Context SHA256))
-> Context SHA256 -> m (Context SHA256)
forall a b. (a -> b) -> a -> b
$ Context SHA256
ctx

-- | Reified instance of the implication to allow us to use this as a
--   constraint.
instance
  (Given Aws.Configuration)
    :=> ContentHashable IO (ObjectInBucket S3.ObjectInfo)
  where
  ins :: Given Configuration
:- ContentHashable IO (ObjectInBucket ObjectInfo)
ins = (Given Configuration =>
 Dict (ContentHashable IO (ObjectInBucket ObjectInfo)))
-> Given Configuration
   :- ContentHashable IO (ObjectInBucket ObjectInfo)
forall (a :: Constraint) (b :: Constraint). (a => Dict b) -> a :- b
Sub Given Configuration =>
Dict (ContentHashable IO (ObjectInBucket ObjectInfo))
forall (a :: Constraint). a => Dict a
Dict