Upgrade to Pro — share decks privately, control downloads, hide ads and more …

You might also like...

You might also like...

Think about Netflix, Spotify or your favorite e-commerce, a lot of content we consume and products we buy come from recommendations made by machines. recommender systems, the mechanism behind those machines have become increasingly popular over the past years to help us cope with information overload.

In this talk, I present how to leverage graph theory to build your own recommender system with JavaScript.

78b75aad21ed5ed351d9822d77acd7fc?s=128

Maria Clara Santana

November 30, 2019
Tweet

More Decks by Maria Clara Santana

Other Decks in Programming

Transcript

  1. you might also like… maria clara @ jsconf japan 19

  2. hi! • developer @ Work & Co • former ML

    researcher • from Brazil • dog person
  3. None
  4. human decisions are hard to predict

  5. https://scholar.google.com.br/scholar?hl=pt-BR&as_sdt=0%2C5&as_vis=1&q=human+decision+processes&btnG=

  6. divisive normalisation

  7. > =

  8. > > =

  9. > > > =

  10. https://poseidon01.ssrn.com/delivery.php? ID=020102027024075109127112065087016014122017071012062030101002076122113070067007120029056029020062102033001067099026089119126115015072091036076110092000001019070066067 085066040012002065066000102114105116073101103114096084004126069067118065095020074003072&EXT=pdf

  11. how to choose?

  12. recommender systems

  13. https://mobilesyrup.com/2017/08/22/80-percent-netflix-shows-discovered-recommendation/ 80%

  14. https://www.netflixprize.com/

  15. 30% https://digital.hbs.edu/platform-rctom/submission/discover-weekly-how-spotify-is-changing-the-way-we-consume-music/

  16. how these systems work?

  17. content-based filtering

  18. None
  19. tf − idf(t, D) = tf(t) * idf(t, D)

  20. class TfIdf { constructor(corpus = [], stopWords) { this.stopWords =

    stopWords !|| ['etc.', '-', 'that', 'my', ‘you’, ‘now', 'the', ‘a’, 'or', ‘some’, 'to', 'of', ‘in', ‘is', 'for', 'and', 'had', 'but']; this.corpus = corpus.map(document !=> this._parseDocument(document)); } _parseDocument(document) { return document .split(' ') .map(word !=> word.toLowerCase()) .filter(word !=> !this.stopWords.includes(word)); } }
  21. class TfIdf { !// constructor implementation; _reduceTerms(document) { return document.reduce((acc,

    word) !=> { if (!acc[word]) { acc[word] = 1; } else { acc[word] = acc[word] + 1; } return acc; }, {}); } getTermsFrequency() { return this.corpus.map(document !=> { const docTerms = this._reduceTerms(document); return Object.keys(docTerms).map(term !=> { const appearances = docTerms[term]; return { term, frequency: appearances / document.length }; }) }); } }
  22. const corpus = [ 'All JavaScript frameworks are terrible', 'Top

    3 Best JavaScript Frameworks for 2019', 'Microfrontends  —  bringing JavaScript frameworks together (React, Angular, Vue etc)', 'JavaScript Frameworks, why and when to use them', 'React/Redux Interview Questions', 'Everything you need to know about change detection in Angular', 'Here is what you need to know about dynamic components in Angular', 'Why Angular 2 (4, 5, 6) sucks', 'Webpack Tutorial: Understanding How it Works', '5 simple (?) algorithms for JavaScript Developers.', ]; const tfIdf = new TfIdf(corpus);
  23. class TfIdf { !// constructor implementation; _documentHasTerm(document, term) { !//

    check if document contains term } getInvDocFrequency(term) { let occurence = 0; for (let i = 0; i < this.corpus.length; i!++) { const doc = this.corpus[i]; if (this._documentHasTerm(doc, term)) { occurence = occurence + 1; } } if (occurence !== 0) { return undefined; } return Math.log(this.corpus.length / occurence); } }
  24. const termsF = tfIdf.getTermsFrequency(); const docsInvF = termsF.map(item !=> {

    return item.map(tf !=> { const idf = tfIdf.getInvDocFrequency(tf.term); return { term: tf.term, frequency: tf.frequency, idf } }); }); const tfIdfValues = docsInvF.map(doc !=> { return doc.map(item !=> ({ term: item.term, relevancy: item.frequency * item.idf })) });
  25. collaborative filtering

  26. None
  27. Super Mario Party Super Mario Odyssey Super Mario Kart Super

    Mario Tennis Pikachu ? 4 5 3 Charmander 3 ? 4 ? Bulbassaur 4 5 5 ? Snorlax 5 4 5 ?
  28. Super Mario Party Super Mario Odyssey Super Mario Kart Super

    Mario Tennis Pikachu ?! 4 5 3 Charmander 3 ? 4 ? Bulbassaur 4 5 5 ? Snorlax 5 4 5 ?
  29. let’s build a recommender engine!

  30. graph theory

  31. P O T K B S P C Weights omitted

    for readability purposes.
  32. G = (V, E)

  33. representing data with graphs

  34. export default class Graph { constructor(directed = false) { this.vertices

    = {}; this.edges = {}; this.directed = directed; } addVertex(vertex) { !// add vertex implementation; } addEdge(edge) { !// add edge implementation } }
  35. class Vertex { constructor(value) { if (value !!=== undefined) {

    throw new Error('Vertex must have a value.'); } const edgeComparator = (edgeA, edgeB) !=> { if (edgeA.getKey() !!=== edgeB.getKey()) { return 0; } return edgeA.getKey() < edgeB.getKey() ? -1 : 1; }; this.value = value; this.edges = new LinkedList(edgeComparator); } addEdge(edge) { this.edges.append(edge); return this; } }
  36. export default class Edge { constructor(startVertex, endVertex, weight = 0)

    { this.startVertex = startVertex; this.endVertex = endVertex; this.weight = weight; } getKey() { const startKey = this.startVertex.getKey(); const endKey = this.endVertex.getKey(); return `${startKey}_${endKey}`; } }
  37. addVertex(vertex) { this.vertices[vertex.getKey()] = vertex; return this; }

  38. addEdge(edge) { let startVertex = this.getVertexByKey(edge.startVertex.getKey()); let endVertex = this.getVertexByKey(edge.endVertex.getKey());

    if (!startVertex) { this.addVertex(edge.startVertex); startVertex = this.getVertexByKey(edge.startVertex.getKey()); } if (!endVertex) { this.addVertex(edge.endVertex); endVertex = this.getVertexByKey(edge.endVertex.getKey()); } !// next block; }
  39. addEdge(edge) { !// prev block; if (this.edges[edge.getKey()]) { throw new

    Error('Edge has already been added before'); } else { this.edges[edge.getKey()] = edge; } if (this.isDirected) { startVertex.addEdge(edge); } else { startVertex.addEdge(edge); endVertex.addEdge(edge); } return this; }
  40. const graph = new Graph(); !// char vertices const pikachu

    = new GraphVertex('pikachu'); const charmander = new GraphVertex('charmander'); const bulbassaur = new GraphVertex('bulbassaur'); const snorlax = new GraphVertex('snorlax'); !// game vertices const party = new GraphVertex('party'); const odyssey = new GraphVertex('odyssey'); const kart = new GraphVertex('kart'); const tennis = new GraphVertex('tennis');
  41. !// rating edges const edgePikachuOdyssey = new GraphEdge(pikachu, odyssey, 4);

    const edgePikachuKart = new GraphEdge(pikachu, kart, 5); const edgeCharmanderParty = new GraphEdge(charmander, party, 3); const edgeCharmanderKart = new GraphEdge(charmander, kart, 4); const edgeCharmanderTennis = new GraphEdge(charmander, tennis); const edgeBulbassaurOdyssey = new GraphEdge(bulbassaur, odyssey, 5); const edgeBulbassaurTennis = new GraphEdge(bulbassaur, tennis, 5); const edgeSnorlaxParty = new GraphEdge(snorlax, party, 5); const edgeSnorlaxOdyssey = new GraphEdge(snorlax, odyssey, 4); const edgeSnorlaxKart = new GraphEdge(snorlax, kart, 5);
  42. graph .addEdge(edgePikachuOdyssey) .addEdge(edgePikachuKart) .addEdge(edgeCharmanderParty) .addEdge(edgeCharmanderKart) .addEdge(edgeCharmanderTennis) .addEdge(edgeBulbassaurOdyssey) .addEdge(edgeBulbassaurTennis) .addEdge(edgeSnorlaxParty) .addEdge(edgeSnorlaxOdyssey)

    .addEdge(edgeSnorlaxKart);
  43. [[Infinity, 4, 5, Infinity, Infinity, Infinity, Infinity, Infinity], [4, Infinity,

    Infinity, Infinity, Infinity, Infinity, 5, 4], [5, Infinity, Infinity, 4, Infinity, Infinity, Infinity, 5], [Infinity, Infinity, 4, Infinity, 3, 0, Infinity, Infinity], [Infinity, Infinity, Infinity, 3, Infinity, Infinity, Infinity, 5], [Infinity, Infinity, Infinity, 0, Infinity, Infinity, 5, Infinity], [Infinity, 5, Infinity, Infinity, Infinity, 5, Infinity, Infinity], [Infinity, 4, 5, Infinity, 5, Infinity, Infinity, Infinity]]
  44. the shortest-path problem

  45. http://www-m3.ma.tum.de/foswiki/pub/MN0506/WebHome/dijkstra.pdf

  46. function Dijkstra(Graph, source): create vertex set Q for each vertex

    v in Graph: dist[v] ← INFINITY prev[v] ← UNDEFINED add v to Q dist[source] ← 0 while Q is not empty: u ← vertex in Q with min dist[u] remove u from Q for each neighbor v of u: alt ← dist[u] + length(u, v) if alt < dist[v]: dist[v]← alt prev[v]← u return dist[], prev[]
  47. export default function dijkstra(graph, startVertex) { const distances = {};

    const visitedVertices = {}; const previousVertices = {}; const queue = new PriorityQueue(); return { distances, previousVertices, }; }
  48. graph.getAllVertices().forEach((vertex) !=> { distances[vertex.getKey()] = Infinity; previousVertices[vertex.getKey()] = null; });

    distances[startVertex.getKey()] = 0; queue.add(startVertex, distances[startVertex.getKey()]); while (!queue.isEmpty()) { !// loop implementation }
  49. const currentVertex = queue.poll(); currentVertex.getNeighbors().forEach((neighbor) !=> { if (!visitedVertices[neighbor.getKey()]) {

    const edge = graph.findEdge(currentVertex, neighbor); const existingDistanceToNeighbor = distances[neighbor.getKey()]; const distanceToNeighborFromCurrent = distances[currentVertex.getKey()] + edge.weight; !// second block }); visitedVertices[currentVertex.getKey()] = currentVertex;
  50. currentVertex.getNeighbors().forEach((neighbor) !=> { if (!visitedVertices[neighbor.getKey()]) { !// first block if

    (distanceToNeighborFromCurrent < existingDistanceToNeighbor) { distances[neighbor.getKey()] = distanceToNeighborFromCurrent; if (queue.hasValue(neighbor)) { queue.changePriority(neighbor, distances[neighbor.getKey()]); } previousVertices[neighbor.getKey()] = currentVertex; } if (!queue.hasValue(neighbor)) { queue.add(neighbor, distances[neighbor.getKey()]); } } }); visitedVertices[currentVertex.getKey()] = currentVertex;
  51. P O T K B S P C Weights omitted

    for readability purposes.
  52. { pikachu: 0, odyssey: 4, kart: 5, charmander: 9, party:

    12, tennis: 9, bulbassaur: 9, snorlax: 8 } { pikachu: 0, odyssey: 4, kart: 5, charmander: 9, party: 12, tennis: 9, bulbassaur: 9, snorlax: 8 } { pikachu: 0, odyssey: 4, kart: 5, charmander: 9, party: 12, tennis: 9, bulbassaur: 9, snorlax: 8 }
  53. is this scalable?

  54. https://pt.slideshare.net/planetcassandra/e-bay-nyc

  55. https://medium.com/pinterest-engineering/an-update-on-pixie-pinterests-recommendation-system-6f273f737e1b

  56. production-ready alternatives

  57. None
  58. ethical concerns

  59. addiction

  60. privacy

  61. http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.304.5839&rep=rep1&type=pdf

  62. thank you! https://olarclara.github.io