Slide 51
Slide 51 text
def _check_for_parallel_run_descrepancy(
self,
pinecone_results: list[PineconeEntry],
postgres_results: list[PostgresEntry]
):
if len(pinecone_results) != len(postgres_results):
self._record_discrepancy(
"different number of results",
{
"pinecone_count": len(pinecone_results),
"postgres_count": len(postgres_results),
},
)
# no point in continuing comparison if we're not comparing the same entries!
return
for pinecone_entry, postgres_entry in zip(pinecone_results, postgres_results):
# pinecone ids have a "pmatch:" prefix which we need to take into account when comparing
if pinecone_entry.coman_id != "pmatch:" + postgres_entry.coman_id:
self._record_discrepancy(
"different coman ids",
{
"pinecone_coman_id": pinecone_entry.coman_id,
"postgres_coman_id": postgres_entry.coman_id,
},
)
# no point in continuing comparison if we're not comparing the same entries!
continue
# a very small variance is possible due to the way that the vector stores calculate vector distance
if abs(pinecone_entry.similarity - postgres_entry.similarity) > 1e-5:
self._record_discrepancy(
"different similarity scores",
{
PARALLEL RUN, IRL