Slide 16
Slide 16 text
def
new_vote(vote,
foreground=False,
timer=None):
user
=
vote._thing1
item
=
vote._thing2
if
timer
is
None:
timer
=
SimpleSillyStub()
if
not
isinstance(item,
(Link,
Comment)):
return
if
vote.valid_thing
and
not
item._spam
and
not
item._deleted:
sr
=
item.subreddit_slow
results
=
[]
author
=
Account._byID(item.author_id)
for
sort
in
('hot',
'top',
'controversial',
'new'):
if
isinstance(item,
Link):
results.append(get_submitted(author,
sort,
'all'))
if
isinstance(item,
Comment):
results.append(get_comments(author,
sort,
'all'))
if
isinstance(item,
Link):
#
don't
do
'new',
because
that
was
done
by
new_link,
and
#
the
time-‐filtered
versions
of
top/controversial
will
be
#
done
by
mr_top
results.extend([get_links(sr,
'hot',
'all'),
get_links(sr,
'top',
'all'),
get_links(sr,
'controversial',
'all’)])
parsed
=
utils.UrlParser(item.url)
if
parsed.hostname
and
not
parsed.hostname.endswith('imgur.com'):
for
domain
in
parsed.domain_permutations():
for
sort
in
("hot",
"top",
"controversial"):
results.append(get_domain_links(domain,
sort,
"all"))
add_queries(results,
insert_items
=
item,
foreground=foreground)
timer.intermediate("permacache”)
if
isinstance(item,
Link):
#
must
update
both
because
we
don't
know
if
it's
a
changed
#
vote
with
CachedQueryMutator()
as
m:
if
vote._name
==
'1':
m.insert(get_liked(user),
[vote])
m.delete(get_disliked(user),
[vote])
elif
vote._name
==
'-‐1':
m.delete(get_liked(user),
[vote])
m.insert(get_disliked(user),
[vote])
else:
m.delete(get_liked(user),
[vote])
m.delete(get_disliked(user),
[vote])
def
add_queries(queries,
insert_items=None,
delete_items=None,
foreground=False):
"""Adds
multiple
queries
to
the
query
queue.
If
insert_items
or
delete_items
is
specified,
the
query
may
not
need
to
be
recomputed
against
the
database."""
for
q
in
queries:
if
insert_items
and
q.can_insert():
log.debug("Inserting
%s
into
query
%s"
%
(insert_items,
q))
if
foreground:
q.insert(insert_items)
else:
worker.do(q.insert,
insert_items)
elif
delete_items
and
q.can_delete():
log.debug("Deleting
%s
from
query
%s"
%
(delete_items,
q))
if
foreground:
q.delete(delete_items)
else:
worker.do(q.delete,
delete_items)
else:
raise
Exception("Cannot
update
query
%r!"
%
(q,))
#
dual-‐write
any
queries
that
are
being
migrated
to
the
new
query
cache
with
CachedQueryMutator()
as
m:
new_queries
=
[getattr(q,
'new_query')
for
q
in
queries
if
hasattr(q,
'new_query')]
if
insert_items:
for
query
in
new_queries:
m.insert(query,
tup(insert_items))
if
delete_items:
for
query
in
new_queries:
m.delete(query,
tup(delete_items))
class
CachedResults(object):
"""Given
a
query
returns
a
list-‐like
object
that
will
lazily
look
up
the
query
from
the
persistent
cache.
"""
def
__init__(self,
query,
filter):
self.query
=
query
self.query._limit
=
precompute_limit
self.filter
=
filter
self.iden
=
self.query._iden()
self.sort_cols
=
[s.col
for
s
in
self.query._sort]
self.data
=
[]
self._fetched
=
False
@property
def
sort(self):
return
self.query._sort
def
fetch(self,
force=False):
"""Loads
the
query
from
the
cache."""
self.fetch_multi([self],
force=force)
@classmethod
def
fetch_multi(cls,
crs,
force=False):
unfetched
=
filter(lambda
cr:
force
or
not
cr._fetched,
crs)
if
not
unfetched:
return
cached
=
query_cache.get_multi([cr.iden
for
cr
in
unfetched],
allow_local
=
not
force)
for
cr
in
unfetched:
cr.data
=
cached.get(cr.iden)
or
[]
cr._fetched
=
True
def
make_item_tuple(self,
item):
"""Given
a
single
'item'
from
the
result
of
a
query
build
the
tuple
that
will
be
stored
in
the
query
cache.
It
is
effectively
the
fullname
of
the
item
after
passing
through
the
filter
plus
the
columns
of
the
unfiltered
item
to
sort
by."""
filtered_item
=
self.filter(item)
lst
=
[filtered_item._fullname]
for
col
in
self.sort_cols:
#take
the
property
of
the
original
attr
=
getattr(item,
col)
#convert
dates
to
epochs
to
take
less
space
if
isinstance(attr,
datetime):
attr
=
epoch_seconds(attr)
lst.append(attr)
return
tuple(lst)
def
can_insert(self):
"""True
if
a
new
item
can
just
be
inserted
rather
than
rerunning
the
query."""
#
This
is
only
true
in
some
circumstances:
queries
where
#
eligibility
in
the
list
is
determined
only
by
its
sort
#
value
(e.g.
hot)
and
where
addition/removal
from
the
list
#
incurs
an
insertion/deletion
event
called
on
the
query.
So
#
the
top
hottest
items
in
X
some
subreddit
where
the
query
#
is
notified
on
every
submission/banning/unbanning/deleting
#
will
work,
but
for
queries
with
a
time-‐component
or
some
#
other
eligibility
factor,
it
cannot
be
inserted
this
way.
if
self.query._sort
in
([desc('_date')],
[desc('_hot'),
desc('_date')],
[desc('_score'),
desc('_date')],
[desc('_controversy'),
desc('_date')]):
if
not
any(r
for
r
in
self.query._rules
if
r.lval.name
==
'_date'):
#
if
no
time-‐rule
is
specified,
then
it's
'all'
return
True
return
False
def
can_delete(self):
"True
if
a
item
can
be
removed
from
the
listing,
always
true
for
now."
return
True
def
_mutate(self,
fn,
willread=True):
self.data
=
query_cache.mutate(self.iden,
fn,
default=[],
willread=willread)
self._fetched=True
def
insert(self,
items):
"""Inserts
the
item
into
the
cached
data.
This
only
works
under
certain
criteria,
see
can_insert."""
self._insert_tuples([self.make_item_tuple(item)
for
item
in
tup(items)])
def
_insert_tuples(self,
t):
def
_mutate(data):
data
=
data
or
[]
#
short-‐circuit
if
we
already
know
that
no
item
to
be
#
added
qualifies
to
be
stored.
Since
we
know
that
this
is
#
sorted
descending
by
datum[1:],
we
can
just
check
the
#
last
item
and
see
if
we're
smaller
than
it
is
if
(len(data)
>=
precompute_limit
and
all(x[1:]
<
data[-‐1][1:]
for
x
in
t)):
return
data
#
insert
the
new
items,
remove
the
duplicates
(keeping
the
#
one
being
inserted
over
the
stored
value
if
applicable),
#
and
sort
the
result
newfnames
=
set(x[0]
for
x
in
t)
data
=
filter(lambda
x:
x[0]
not
in
newfnames,
data)
data.extend(t)
data.sort(reverse=True,
key=lambda
x:
x[1:])
if
len(t)
+
len(data)
>
precompute_limit:
data
=
data[:precompute_limit]
return
data
self._mutate(_mutate)
def
delete(self,
items):
"""Deletes
an
item
from
the
cached
data."""
fnames
=
set(self.filter(x)._fullname
for
x
in
tup(items))
def
_mutate(data):
data
=
data
or
[]
return
filter(lambda
x:
x[0]
not
in
fnames,
data)
self._mutate(_mutate)
def
_replace(self,
tuples):
"""Take
pre-‐rendered
tuples
from
mr_top
and
replace
the
contents
of
the
query
outright.
This
should
be
considered
a
private
API"""
def
_mutate(data):
return
tuples
self._mutate(_mutate,
willread=False)
def
update(self):
"""Runs
the
query
and
stores
the
result
in
the
cache.
This
is
only
run
by
hand."""
self.data
=
[self.make_item_tuple(i)
for
i
in
self.query]
self._fetched
=
True
query_cache.set(self.iden,
self.data)
def
__repr__(self):
return
''
%
(self.query._rules,
self.query._sort)
def
__iter__(self):
self.fetch()
for
x
in
self.data:
yield
x[0]
class
MergedCachedResults(object):
"""Given
two
CachedResults,
merges
their
lists
based
on
the
sorts
of
their
queries."""
#
normally
we'd
do
this
by
having
a
superclass
of
CachedResults,
#
but
we
have
legacy
pickled
CachedResults
that
we
don't
want
to
#
break
def
__init__(self,
results):
self.cached_results
=
results
CachedResults.fetch_multi([r
for
r
in
results
if
isinstance(r,
CachedResults)])
CachedQuery._fetch_multi([r
for
r
in
results
if
isinstance(r,
CachedQuery)])
self._fetched
=
True
self.sort
=
results[0].sort
comparator
=
ThingTupleComparator(self.sort)
#
make
sure
they're
all
the
same
assert
all(r.sort
==
self.sort
for
r
in
results[1:])
all_items
=
[]
for
cr
in
results:
all_items.extend(cr.data)
all_items.sort(cmp=comparator)
self.data
=
all_items
hTps://github.com/reddit/reddit/blob/
master/r2/r2/lib/db/queries.py