Upgrade to Pro — share decks privately, control downloads, hide ads and more …

You might also like...

You might also like...

Think about Netflix, Spotify or your favorite e-commerce, a lot of content we consume and products we buy come from recommendations made by machines. recommender systems, the mechanism behind those machines have become increasingly popular over the past years to help us cope with information overload.

In this talk, I present how to leverage graph theory to build your own recommender system with JavaScript.

Maria Clara Santana

November 30, 2019
Tweet

More Decks by Maria Clara Santana

Other Decks in Programming

Transcript

  1. you might also like…
    maria clara @ jsconf japan 19

    View Slide

  2. hi!
    • developer @ Work &
    Co
    • former ML researcher
    • from Brazil
    • dog person

    View Slide

  3. View Slide

  4. human decisions are
    hard to predict

    View Slide

  5. https://scholar.google.com.br/scholar?hl=pt-BR&as_sdt=0%2C5&as_vis=1&q=human+decision+processes&btnG=

    View Slide

  6. divisive normalisation

    View Slide

  7. >
    =

    View Slide

  8. > >
    =

    View Slide

  9. > > >
    =

    View Slide

  10. https://poseidon01.ssrn.com/delivery.php?
    ID=020102027024075109127112065087016014122017071012062030101002076122113070067007120029056029020062102033001067099026089119126115015072091036076110092000001019070066067
    085066040012002065066000102114105116073101103114096084004126069067118065095020074003072&EXT=pdf

    View Slide

  11. how to choose?

    View Slide

  12. recommender systems

    View Slide

  13. https://mobilesyrup.com/2017/08/22/80-percent-netflix-shows-discovered-recommendation/
    80%

    View Slide

  14. https://www.netflixprize.com/

    View Slide

  15. 30%
    https://digital.hbs.edu/platform-rctom/submission/discover-weekly-how-spotify-is-changing-the-way-we-consume-music/

    View Slide

  16. how these systems
    work?

    View Slide

  17. content-based filtering

    View Slide

  18. View Slide

  19. tf − idf(t, D) = tf(t) * idf(t, D)

    View Slide

  20. class TfIdf {
    constructor(corpus = [], stopWords) {
    this.stopWords = stopWords
    !|| ['etc.', '-', 'that', 'my', ‘you’, ‘now',
    'the', ‘a’, 'or', ‘some’, 'to', 'of', ‘in',
    ‘is', 'for', 'and', 'had', 'but'];
    this.corpus = corpus.map(document !=>
    this._parseDocument(document));
    }
    _parseDocument(document) {
    return document
    .split(' ')
    .map(word !=> word.toLowerCase())
    .filter(word !=> !this.stopWords.includes(word));
    }
    }

    View Slide

  21. class TfIdf {
    !// constructor implementation;
    _reduceTerms(document) {
    return document.reduce((acc, word) !=> {
    if (!acc[word]) {
    acc[word] = 1;
    } else {
    acc[word] = acc[word] + 1;
    }
    return acc;
    }, {});
    }
    getTermsFrequency() {
    return this.corpus.map(document !=> {
    const docTerms = this._reduceTerms(document);
    return Object.keys(docTerms).map(term !=> {
    const appearances = docTerms[term];
    return { term, frequency: appearances / document.length };
    })
    });
    }
    }

    View Slide

  22. const corpus = [
    'All JavaScript frameworks are terrible',
    'Top 3 Best JavaScript Frameworks for 2019',
    'Microfrontends  —  bringing JavaScript frameworks together
    (React, Angular, Vue etc)',
    'JavaScript Frameworks, why and when to use them',
    'React/Redux Interview Questions',
    'Everything you need to know about change detection
    in Angular',
    'Here is what you need to know about dynamic components
    in Angular',
    'Why Angular 2 (4, 5, 6) sucks',
    'Webpack Tutorial: Understanding How it Works',
    '5 simple (?) algorithms for JavaScript Developers.',
    ];
    const tfIdf = new TfIdf(corpus);

    View Slide

  23. class TfIdf {
    !// constructor implementation;
    _documentHasTerm(document, term) {
    !// check if document contains term
    }
    getInvDocFrequency(term) {
    let occurence = 0;
    for (let i = 0; i < this.corpus.length; i!++) {
    const doc = this.corpus[i];
    if (this._documentHasTerm(doc, term)) {
    occurence = occurence + 1;
    }
    }
    if (occurence !== 0) {
    return undefined;
    }
    return Math.log(this.corpus.length / occurence);
    }
    }

    View Slide

  24. const termsF = tfIdf.getTermsFrequency();
    const docsInvF = termsF.map(item !=> {
    return item.map(tf !=> {
    const idf = tfIdf.getInvDocFrequency(tf.term);
    return { term: tf.term, frequency: tf.frequency, idf }
    });
    });
    const tfIdfValues = docsInvF.map(doc !=> {
    return doc.map(item !=> ({
    term: item.term,
    relevancy: item.frequency * item.idf
    }))
    });

    View Slide

  25. collaborative filtering

    View Slide

  26. View Slide

  27. Super Mario
    Party
    Super Mario
    Odyssey
    Super Mario
    Kart
    Super Mario
    Tennis
    Pikachu ? 4 5 3
    Charmander 3 ? 4 ?
    Bulbassaur 4 5 5 ?
    Snorlax 5 4 5 ?

    View Slide

  28. Super Mario
    Party
    Super Mario
    Odyssey
    Super Mario
    Kart
    Super Mario
    Tennis
    Pikachu ?! 4 5 3
    Charmander 3 ? 4 ?
    Bulbassaur 4 5 5 ?
    Snorlax 5 4 5 ?

    View Slide

  29. let’s build a
    recommender engine!

    View Slide

  30. graph theory

    View Slide

  31. P O
    T
    K
    B
    S
    P
    C
    Weights omitted for readability purposes.

    View Slide

  32. G = (V, E)

    View Slide

  33. representing data with
    graphs

    View Slide

  34. export default class Graph {
    constructor(directed = false) {
    this.vertices = {};
    this.edges = {};
    this.directed = directed;
    }
    addVertex(vertex) {
    !// add vertex implementation;
    }
    addEdge(edge) {
    !// add edge implementation
    }
    }

    View Slide

  35. class Vertex {
    constructor(value) {
    if (value !!=== undefined) {
    throw new Error('Vertex must have a value.');
    }
    const edgeComparator = (edgeA, edgeB) !=> {
    if (edgeA.getKey() !!=== edgeB.getKey()) {
    return 0;
    }
    return edgeA.getKey() < edgeB.getKey() ? -1 : 1;
    };
    this.value = value;
    this.edges = new LinkedList(edgeComparator);
    }
    addEdge(edge) {
    this.edges.append(edge);
    return this;
    }
    }

    View Slide

  36. export default class Edge {
    constructor(startVertex, endVertex, weight = 0) {
    this.startVertex = startVertex;
    this.endVertex = endVertex;
    this.weight = weight;
    }
    getKey() {
    const startKey = this.startVertex.getKey();
    const endKey = this.endVertex.getKey();
    return `${startKey}_${endKey}`;
    }
    }

    View Slide

  37. addVertex(vertex) {
    this.vertices[vertex.getKey()] = vertex;
    return this;
    }

    View Slide

  38. addEdge(edge) {
    let startVertex =
    this.getVertexByKey(edge.startVertex.getKey());
    let endVertex =
    this.getVertexByKey(edge.endVertex.getKey());
    if (!startVertex) {
    this.addVertex(edge.startVertex);
    startVertex =
    this.getVertexByKey(edge.startVertex.getKey());
    }
    if (!endVertex) {
    this.addVertex(edge.endVertex);
    endVertex =
    this.getVertexByKey(edge.endVertex.getKey());
    }
    !// next block;
    }

    View Slide

  39. addEdge(edge) {
    !// prev block;
    if (this.edges[edge.getKey()]) {
    throw new Error('Edge has already been added before');
    } else {
    this.edges[edge.getKey()] = edge;
    }
    if (this.isDirected) {
    startVertex.addEdge(edge);
    } else {
    startVertex.addEdge(edge);
    endVertex.addEdge(edge);
    }
    return this;
    }

    View Slide

  40. const graph = new Graph();
    !// char vertices
    const pikachu = new GraphVertex('pikachu');
    const charmander = new GraphVertex('charmander');
    const bulbassaur = new GraphVertex('bulbassaur');
    const snorlax = new GraphVertex('snorlax');
    !// game vertices
    const party = new GraphVertex('party');
    const odyssey = new GraphVertex('odyssey');
    const kart = new GraphVertex('kart');
    const tennis = new GraphVertex('tennis');

    View Slide

  41. !// rating edges
    const edgePikachuOdyssey =
    new GraphEdge(pikachu, odyssey, 4);
    const edgePikachuKart =
    new GraphEdge(pikachu, kart, 5);
    const edgeCharmanderParty =
    new GraphEdge(charmander, party, 3);
    const edgeCharmanderKart =
    new GraphEdge(charmander, kart, 4);
    const edgeCharmanderTennis =
    new GraphEdge(charmander, tennis);
    const edgeBulbassaurOdyssey =
    new GraphEdge(bulbassaur, odyssey, 5);
    const edgeBulbassaurTennis =
    new GraphEdge(bulbassaur, tennis, 5);
    const edgeSnorlaxParty =
    new GraphEdge(snorlax, party, 5);
    const edgeSnorlaxOdyssey =
    new GraphEdge(snorlax, odyssey, 4);
    const edgeSnorlaxKart =
    new GraphEdge(snorlax, kart, 5);

    View Slide

  42. graph
    .addEdge(edgePikachuOdyssey)
    .addEdge(edgePikachuKart)
    .addEdge(edgeCharmanderParty)
    .addEdge(edgeCharmanderKart)
    .addEdge(edgeCharmanderTennis)
    .addEdge(edgeBulbassaurOdyssey)
    .addEdge(edgeBulbassaurTennis)
    .addEdge(edgeSnorlaxParty)
    .addEdge(edgeSnorlaxOdyssey)
    .addEdge(edgeSnorlaxKart);

    View Slide

  43. [[Infinity, 4, 5, Infinity, Infinity, Infinity, Infinity, Infinity],
    [4, Infinity, Infinity, Infinity, Infinity, Infinity, 5, 4],
    [5, Infinity, Infinity, 4, Infinity, Infinity, Infinity, 5],
    [Infinity, Infinity, 4, Infinity, 3, 0, Infinity, Infinity],
    [Infinity, Infinity, Infinity, 3, Infinity, Infinity, Infinity, 5],
    [Infinity, Infinity, Infinity, 0, Infinity, Infinity, 5, Infinity],
    [Infinity, 5, Infinity, Infinity, Infinity, 5, Infinity, Infinity],
    [Infinity, 4, 5, Infinity, 5, Infinity, Infinity, Infinity]]

    View Slide

  44. the shortest-path
    problem

    View Slide

  45. http://www-m3.ma.tum.de/foswiki/pub/MN0506/WebHome/dijkstra.pdf

    View Slide

  46. function Dijkstra(Graph, source):
    create vertex set Q
    for each vertex v in Graph:
    dist[v] ← INFINITY
    prev[v] ← UNDEFINED
    add v to Q
    dist[source] ← 0
    while Q is not empty:
    u ← vertex in Q with min dist[u]
    remove u from Q
    for each neighbor v of u:
    alt ← dist[u] + length(u, v)
    if alt < dist[v]:
    dist[v]← alt
    prev[v]← u
    return dist[], prev[]

    View Slide

  47. export default function dijkstra(graph, startVertex) {
    const distances = {};
    const visitedVertices = {};
    const previousVertices = {};
    const queue = new PriorityQueue();
    return {
    distances,
    previousVertices,
    };
    }

    View Slide

  48. graph.getAllVertices().forEach((vertex) !=> {
    distances[vertex.getKey()] = Infinity;
    previousVertices[vertex.getKey()] = null;
    });
    distances[startVertex.getKey()] = 0;
    queue.add(startVertex, distances[startVertex.getKey()]);
    while (!queue.isEmpty()) {
    !// loop implementation
    }

    View Slide

  49. const currentVertex = queue.poll();
    currentVertex.getNeighbors().forEach((neighbor) !=> {
    if (!visitedVertices[neighbor.getKey()]) {
    const edge = graph.findEdge(currentVertex, neighbor);
    const existingDistanceToNeighbor =
    distances[neighbor.getKey()];
    const distanceToNeighborFromCurrent =
    distances[currentVertex.getKey()] + edge.weight;
    !// second block
    });
    visitedVertices[currentVertex.getKey()] = currentVertex;

    View Slide

  50. currentVertex.getNeighbors().forEach((neighbor) !=> {
    if (!visitedVertices[neighbor.getKey()]) {
    !// first block
    if (distanceToNeighborFromCurrent
    < existingDistanceToNeighbor) {
    distances[neighbor.getKey()] = distanceToNeighborFromCurrent;
    if (queue.hasValue(neighbor)) {
    queue.changePriority(neighbor,
    distances[neighbor.getKey()]);
    }
    previousVertices[neighbor.getKey()] = currentVertex;
    }
    if (!queue.hasValue(neighbor)) {
    queue.add(neighbor, distances[neighbor.getKey()]);
    }
    }
    });
    visitedVertices[currentVertex.getKey()] = currentVertex;

    View Slide

  51. P O
    T
    K
    B
    S
    P
    C
    Weights omitted for readability purposes.

    View Slide

  52. {
    pikachu: 0,
    odyssey: 4,
    kart: 5,
    charmander: 9,
    party: 12,
    tennis: 9,
    bulbassaur: 9,
    snorlax: 8
    }
    {
    pikachu: 0,
    odyssey: 4,
    kart: 5,
    charmander: 9,
    party: 12,
    tennis: 9,
    bulbassaur: 9,
    snorlax: 8
    }
    {
    pikachu: 0,
    odyssey: 4,
    kart: 5,
    charmander: 9,
    party: 12,
    tennis: 9,
    bulbassaur: 9,
    snorlax: 8
    }

    View Slide

  53. is this scalable?

    View Slide

  54. https://pt.slideshare.net/planetcassandra/e-bay-nyc

    View Slide

  55. https://medium.com/pinterest-engineering/an-update-on-pixie-pinterests-recommendation-system-6f273f737e1b

    View Slide

  56. production-ready
    alternatives

    View Slide

  57. View Slide

  58. ethical concerns

    View Slide

  59. addiction

    View Slide

  60. privacy

    View Slide

  61. http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.304.5839&rep=rep1&type=pdf

    View Slide

  62. thank you!
    https://olarclara.github.io

    View Slide