data Step data cursor = Yield data !cursor
| Skip !cursor
| Done
data Stream data =
∃s. Stream (cursor → Step data cursor) cursor
Slide 32
Slide 32 text
Stream Beginning:
reading from the DB
Slide 33
Slide 33 text
map
Yield data cursor → Yield (f cursor) cursor
Skip cursor → Skip cursor
Done → Done
maps :: (a → b) → Stream a → Stream b
Slide 34
Slide 34 text
filter
Yield data cursor | p data → Yield data cursor
| otherwise → Skip cursor
Skip cursor → Skip cursor
Done → Done
filters :: (a → Bool) → Stream a → Stream a
Slide 35
Slide 35 text
reduce/fold
Yield x cursor → loop (f data x) cursor
Skip cursor → loop data cursor
Done → z
foldls :: (Monoid acc) => (acc → a → acc) → acc → Stream a → acc
Slide 36
Slide 36 text
Append
class Monoid a where
mempty :: a
mappend :: a -> a -> a
-- ^ Identity of 'mappend'
-- ^ An associative operation
Slide 37
Slide 37 text
class (Monoid intermediate) =>
Aggregate intermediate end
where
combine :: intermediate -> end
Combine
Slide 38
Slide 38 text
data Count = Count Int
instance Monoid Count where
mempty = Count 0
mappend (Count a) (Count b) = Count $ a + b
instance Aggregate Count Int where
combine (Count a) = a
Count Example
Advantages
64 bits per 8-byte Long
Easy to represent by the long-array using
offsets, bit shifts and masks
Easy to implement atomic in-memory operations
Slide 63
Slide 63 text
Count-min sketches
are basically int matrices
Slide 64
Slide 64 text
Histograms
are basically long vectors
Slide 65
Slide 65 text
Conclusions
Ad-hoc queries
Parallelism
Lightweight DSs representation
Optimisations and good API fits