Whiskey Groovy Ignite - Speaker Deck

Slide 1

Slide 1 text

June 6, 2023 1 Whiskey Clustering with Apache Groovy & Apache Ignite Paul King VP AT APACHE GROOVY, PRINCIPAL SOFTWARE ENGINEER AT UNITY FOUNDATION

Slide 18

Slide 18 text

Metaprogramming // imports not shown public class Book { private String $to$string; private int $hash$code; private final List authors; private final String title; private final Date publicationDate; private static final java.util.Comparator this$TitleComparator; private static final java.util.Comparator this$PublicationDateComparator; public Book(List authors, String title, Date publicationDate) { if (authors == null) { this.authors = null; } else { if (authors instanceof Cloneable) { List authorsCopy = (List) ((ArrayList) authors).clone(); this.authors = (List) (authorsCopy instanceof SortedSet ? DefaultGroovyMethods.asImmutable(authorsCopy) : authorsCopy instanceof SortedMap ? DefaultGroovyMethods.asImmutable(authorsCopy) : authorsCopy instanceof Set ? DefaultGroovyMethods.asImmutable(authorsCopy) : authorsCopy instanceof Map ? DefaultGroovyMethods.asImmutable(authorsCopy) : authorsCopy instanceof List ? DefaultGroovyMethods.asImmutable(authorsCopy) : DefaultGroovyMethods.asImmutable(authorsCopy)); } else { this.authors = (List) (authors instanceof SortedSet ? DefaultGroovyMethods.asImmutable(authors) : authors instanceof SortedMap ? DefaultGroovyMethods.asImmutable(authors) : authors instanceof Set ? DefaultGroovyMethods.asImmutable(authors) : authors instanceof Map ? DefaultGroovyMethods.asImmutable(authors) : authors instanceof List ? DefaultGroovyMethods.asImmutable(authors) : DefaultGroovyMethods.asImmutable(authors)); } } this.title= title; if (publicationDate== null) { this.publicationDate= null; } else { this.publicationDate= (Date) publicationDate.clone(); } } public Book(Map args) { if ( args == null) { args = new HashMap(); } ImmutableASTTransformation.checkPropNames(this, args); if (args.containsKey("authors")) { if ( args.get("authors") == null) { this .authors = null; } else { if (args.get("authors") instanceof Cloneable) { List authorsCopy = (List) ((ArrayList) args.get("authors")).clone(); this.authors = (List) (authorsCopy instanceof SortedSet ? DefaultGroovyMethods.asImmutable(authorsCopy) : authorsCopy instanceof SortedMap ? DefaultGroovyMethods.asImmutable(authorsCopy) : authorsCopy instanceof Set ? DefaultGroovyMethods.asImmutable(authorsCopy) : authorsCopy instanceof Map ? DefaultGroovyMethods.asImmutable(authorsCopy) : authorsCopy instanceof List ? DefaultGroovyMethods.asImmutable(authorsCopy) : DefaultGroovyMethods.asImmutable(authorsCopy)); } else { List authors = (List) args.get("authors"); this.authors = (List) (authors instanceof SortedSet ? DefaultGroovyMethods.asImmutable(authors) : authors instanceof SortedMap ? DefaultGroovyMethods.asImmutable(authors) : authors instanceof Set ? DefaultGroovyMethods.asImmutable(authors) : authors instanceof Map ? DefaultGroovyMethods.asImmutable(authors) : authors instanceof List ? DefaultGroovyMethods.asImmutable(authors) : DefaultGroovyMethods.asImmutable(authors)); } } } else { this .authors = null; } if (args.containsKey("title")) {this .title = (String) args.get("title"); } else { this .title = null;} if (args.containsKey("publicationDate")) { if (args.get("publicationDate") == null) { this.publicationDate = null; } else { this.publicationDate = (Date) ((Date) args.get("publicationDate")).clone(); } } else {this.publicationDate = null; } } … public Book() { this (new HashMap()); } public int compareTo(Book other) { if (this == other) { return 0; } Integer value = 0 value = this .title <=> other .title if ( value != 0) { return value } value = this .publicationDate <=> other .publicationDate if ( value != 0) { return value } return 0 } public static Comparator comparatorByTitle() { return this$TitleComparator; } public static Comparator comparatorByPublicationDate() { return this$PublicationDateComparator; } public String toString() { StringBuilder _result = new StringBuilder(); boolean $toStringFirst= true; _result.append("Book("); if ($toStringFirst) { $toStringFirst = false; } else { _result.append(", "); } _result.append(InvokerHelper.toString(this.getAuthors())); if ($toStringFirst) { $toStringFirst = false; } else { _result.append(", "); } _result.append(InvokerHelper.toString(this.getTitle())); if ($toStringFirst) { $toStringFirst = false; } else { _result.append(", "); } _result.append(InvokerHelper.toString(this.getPublicationDate())); _result.append(")"); if ($to$string == null) { $to$string = _result.toString(); } return $to$string; } public int hashCode() { if ( $hash$code == 0) { int _result = HashCodeHelper.initHash(); if (!(this.getAuthors().equals(this))) { _result = HashCodeHelper.updateHash(_result, this.getAuthors()); } if (!(this.getTitle().equals(this))) { _result = HashCodeHelper.updateHash(_result, this.getTitle()); } if (!(this.getPublicationDate().equals(this))) { _result = HashCodeHelper.updateHash(_result, this.getPublicationDate()); } $hash$code = (int) _result; } return $hash$code; } public boolean canEqual(Object other) { return other instanceof Book; } … public boolean equals(Object other) { if ( other == null) { return false; } if (this == other) { return true; } if (!( other instanceof Book)) { return false; } Book otherTyped = (Book) other; if (!(otherTyped.canEqual( this ))) { return false; } if (!(this.getAuthors() == otherTyped.getAuthors())) { return false; } if (!(this.getTitle().equals(otherTyped.getTitle()))) { return false; } if (!(this.getPublicationDate().equals(otherTyped.getPublicationDate()))) { return false; } return true; } public final Book copyWith(Map map) { if (map == null || map.size() == 0) { return this; } Boolean dirty = false; HashMap construct = new HashMap(); if (map.containsKey("authors")) { Object newValue = map.get("authors"); Object oldValue = this.getAuthors(); if (newValue != oldValue) { oldValue= newValue; dirty = true; } construct.put("authors", oldValue); } else { construct.put("authors", this.getAuthors()); } if (map.containsKey("title")) { Object newValue = map.get("title"); Object oldValue = this.getTitle(); if (newValue != oldValue) { oldValue= newValue; dirty = true; } construct.put("title", oldValue); } else { construct.put("title", this.getTitle()); } if (map.containsKey("publicationDate")) { Object newValue = map.get("publicationDate"); Object oldValue = this.getPublicationDate(); if (newValue != oldValue) { oldValue= newValue; dirty = true; } construct.put("publicationDate", oldValue); } else { construct.put("publicationDate", this.getPublicationDate()); } return dirty == true ? new Book(construct) : this; } public void writeExternal(ObjectOutputout) throws IOException { out.writeObject(authors); out.writeObject(title); out.writeObject(publicationDate); } public void readExternal(ObjectInputoin) throws IOException, ClassNotFoundException{ authors = (List) oin.readObject(); title = (String) oin.readObject(); publicationDate= (Date) oin.readObject(); } … static { this$TitleComparator = new Book$TitleComparator(); this$PublicationDateComparator = new Book$PublicationDateComparator(); } public String getAuthors(int index) { return authors.get(index); } public List getAuthors() { return authors; } public final String getTitle() { return title; } public final Date getPublicationDate() { if (publicationDate== null) { return publicationDate; } else { return (Date) publicationDate.clone(); } } public int compare(java.lang.Objectparam0, java.lang.Objectparam1) { return -1; } private static class Book$TitleComparator extends AbstractComparator { public Book$TitleComparator() { } public int compare(Book arg0, Book arg1) { if (arg0 == arg1) { return 0; } if (arg0 != null && arg1 == null) { return -1; } if (arg0 == null && arg1 != null) { return 1; } return arg0.title <=> arg1.title; } public int compare(java.lang.Objectparam0, java.lang.Objectparam1) { return -1; } } private static class Book$PublicationDateComparator extends AbstractComparator { public Book$PublicationDateComparator() { } public int compare(Book arg0, Book arg1) { if ( arg0 == arg1 ) { return 0; } if ( arg0 != null && arg1 == null) { return -1; } if ( arg0 == null && arg1 != null) { return 1; } return arg0 .publicationDate <=> arg1 .publicationDate; } public int compare(java.lang.Objectparam0, java.lang.Objectparam1) { return -1; } } } @Immutable(copyWith = true) @Sortable(excludes = 'authors') @AutoExternalize class Book { @IndexedProperty List authors String title Date publicationDate }

Slide 46

Slide 46 text

import … def cols = ["Body", "Sweetness", "Smoky", "Medicinal", "Tobacco", "Honey", "Spicy", "Winey", "Nutty", "Malty", "Fruity", "Floral"] def numClusters = 5 def loader = new CSVLoader(file: 'whiskey.csv') def clusterer = new SimpleKMeans(numClusters: numClusters, preserveInstancesOrder: true) def instances = loader.dataSet instances.deleteAttributeAt(0) // remove RowID clusterer.buildClusterer(instances) println ' ' + cols.join(', ') def dataset = new DefaultCategoryDataset() clusterer.clusterCentroids.eachWithIndex{ Instance ctrd, num -> print "Cluster ${num+1}: " println ((1..cols.size()).collect{ sprintf '%.3f', ctrd.value(it) }.join(', ')) (1..cols.size()).each { idx -> dataset.addValue(ctrd.value(idx), "Cluster ${num+1}", cols[idx-1]) } } def clusters = (0.. clusters[cnum] << instances.get(idx).stringValue(0) } clusters.each { k, v -> println "Cluster ${k+1}:" println v.join(', ') } def plot = new SpiderWebPlot(dataset: dataset) def chart = new JFreeChart('Whiskey clusters', plot) SwingUtil.show(new ChartPanel(chart)) Whiskey – clustering with radar plot and weka Body, Sweetness, Smoky, Medicinal, Tobacco, Honey, Spicy, Winey, Nutty, Malty, Fruity, Floral Cluster 1: 3.800, 1.600, 3.600, 3.600, 0.600, 0.200, 1.600, 0.600, 1.000, 1.400, 1.200, 0.000 Cluster 2: 2.773, 2.409, 1.545, 0.045, 0.000, 1.818, 1.591, 2.000, 2.091, 2.136, 2.136, 1.591 Cluster 3: 1.773, 2.455, 1.318, 0.636, 0.000, 0.636, 1.000, 0.409, 1.636, 1.364, 1.591, 1.591 Cluster 4: 1.500, 2.233, 1.267, 0.267, 0.000, 1.533, 1.400, 0.700, 1.000, 1.900, 1.900, 2.133 Cluster 5: 2.000, 2.143, 1.857, 0.857, 1.000, 0.857, 1.714, 1.000, 1.286, 2.000, 1.429, 1.714 Cluster 1: Ardbeg, Clynelish, Lagavulin, Laphroig, Talisker Cluster 2: Aberfeldy, Aberlour, Ardmore, Auchroisk, Balmenach, BenNevis, Benrinnes, Benromach, BlairAthol, Dailuaine, Dalmore, Edradour, Glendronach, Glendullan, Glenfarclas, Glenrothes, Glenturret, Longmorn, Macallan, Mortlach, RoyalLochnagar, Strathisla Cluster 3: ArranIsleOf, Aultmore, Balblair, Cardhu, Craigganmore, Dufftown, GlenGrant, GlenKeith, GlenScotia, GlenSpey, Glenfiddich, Glenmorangie, Isle of Jura, Mannochmore, Miltonduff, Oban, Speyside, Springbank, Strathmill, Tamnavulin, Teaninich, Tomore Cluster 4: AnCnoc, Auchentoshan, Belvenie, Benriach, Bladnoch, Bowmore, Bruichladdich, Bunnahabhain, Dalwhinnie, Deanston, GlenElgin, GlenGarioch, GlenMoray, GlenOrd, Glenallachie, Glengoyne, Glenkinchie, Glenlivet, Glenlossie, Highland Park, Inchgower, Knochando, Linkwood, Loch Lomond, Scapa, Speyburn, Tamdhu, Tobermory, Tomatin, Tomintoul Cluster 5: Caol Ila, Craigallechie, GlenDeveronMacduff, OldFettercairn, OldPulteney, RoyalBrackla, Tullibardine

Slide 66

Slide 66 text

Scaling clustering: K-means k=3 Euclidean var dist = new EuclideanDistance() … Ignition.start(cfg).withCloseable { ignite -> println ">>> Ignite grid started for data: ${data.size()} rows X ${data[0].size()} cols" var dataCache = ignite.createCache(new CacheConfiguration( name: "TEST_${UUID.randomUUID()}", affinity: new RendezvousAffinityFunction(false, 10))) data.indices.each { int i -> dataCache.put(i, data[i]) } var trainer = new KMeansTrainer().withDistance(dist).withAmountOfClusters(3) var mdl = trainer.fit(ignite, dataCache, vectorizer) println ">>> KMeans centroids:\n${features.join(', ')}" var centroids = mdl.centers*.all() var cols = centroids.collect{ it*.get() } cols.each { c -> println c.collect(pretty).join(', ') } dataCache.destroy() } 5 … Cluster 1: Ardbeg, Caol Ila, Clynelish, Lagavulin, Laphroig, Talisker Distinguishing features: Body=3..4, Sweetness=1..2, Smoky=3..4, Medicinal=2..4, Honey=0..1, Winey=0..2, Nutty=1..2, Malty=1..2, Fruity=0..2, Floral=0..1 Cluster 2: Ardmore, ArranIsleOf, Balblair, Balmenach, BlairAthol, Bowmore, Bruichladdich, Dailuaine, Dalmore, GlenDeveronMacduff, GlenGarioch, GlenScotia, GlenSpey, Glendronach, Glenrothes, Highland Park, Isle of Jura, Loch Lomond, Mortlach, Oban, OldFettercairn, OldPulteney, Springbank, Teaninich, Tomatin, Tomore Distinguishing features: Sweetness=1..3, Smoky=1..3, Medicinal=0..2, Honey=0..2, Floral=0..2 Cluster 3: Aberfeldy, Aberlour, AnCnoc, Auchentoshan, Auchroisk, Aultmore, Belvenie, BenNevis, Benriach, Benrinnes, Benromach, Bladnoch, Bunnahabhain, Cardhu, Craigallechie, Craigganmore, Dalwhinnie, Deanston, Dufftown, Edradour, GlenElgin, GlenGrant, GlenKeith, GlenMoray, GlenOrd, Glenallachie, Glendullan, Glenfarclas, Glenfiddich, Glengoyne, Glenkinchie, Glenlivet, Glenlossie, Glenmorangie, Glenturret, Inchgower, Knochando, Linkwood, Longmorn, Macallan, Mannochmore, Miltonduff, RoyalBrackla, RoyalLochnagar, Scapa, Speyburn, Speyside, Strathisla, Strathmill, Tamdhu, Tamnavulin, Tobermory, Tomintoul, Tullibardine Distinguishing features: Smoky=0..2, Medicinal=0..1, Malty=1..3, Fruity=1..3 …

Slide 68

Slide 68 text

Scaling clustering: K-means k=3 Manhattan var dist = new ManhattanDistance() … Ignition.start(cfg).withCloseable { ignite -> println ">>> Ignite grid started for data: ${data.size()} rows X ${data[0].size()} cols" var dataCache = ignite.createCache(new CacheConfiguration( name: "TEST_${UUID.randomUUID()}", affinity: new RendezvousAffinityFunction(false, 10))) data.indices.each { int i -> dataCache.put(i, data[i]) } var trainer = new KMeansTrainer().withDistance(dist).withAmountOfClusters(3) var mdl = trainer.fit(ignite, dataCache, vectorizer) println ">>> KMeans centroids:\n${features.join(', ')}" var centroids = mdl.centers*.all() var cols = centroids.collect{ it*.get() } cols.each { c -> println c.collect(pretty).join(', ') } dataCache.destroy() } 4 3 3 + 4 = 7 … Cluster 1: Aberfeldy, Aberlour, AnCnoc, Ardmore, Auchroisk, Balmenach, Belvenie, BenNevis, Benrinnes, Benromach, BlairAthol, Bowmore, Bruichladdich, Craigallechie, Dailuaine, Dalmore, Deanston, Edradour, Glendronach, Glendullan, Glenfarclas, Glenlivet, Glenturret, Knochando, Macallan, Mortlach, OldFettercairn, RoyalLochnagar, Scapa, Strathisla, Tomatin, Tomintoul Distinguishing features: Smoky=1..3, Medicinal=0..2 Cluster 2: Ardbeg, Caol Ila, Clynelish, GlenScotia, Highland Park, Isle of Jura, Lagavulin, Laphroig, Oban, OldPulteney, Springbank, Talisker Distinguishing features: Body=2..4, Sweetness=1..2, Smoky=2..4, Honey=0..2, Winey=0..2, Nutty=1..2, Malty=1..2, Fruity=0..2, Floral=0..2 Cluster 3: ArranIsleOf, Auchentoshan, Aultmore, Balblair, Benriach, Bladnoch, Bunnahabhain, Cardhu, Craigganmore, Dalwhinnie, Dufftown, GlenDeveronMacduff, GlenElgin, GlenGarioch, GlenGrant, GlenKeith, GlenMoray, GlenOrd, GlenSpey, Glenallachie, Glenfiddich, Glengoyne, Glenkinchie, Glenlossie, Glenmorangie, Glenrothes, Inchgower, Linkwood, Loch Lomond, Longmorn, Mannochmore, Miltonduff, RoyalBrackla, Speyburn, Speyside, Strathmill, Tamdhu, Tamnavulin, Teaninich, Tobermory, Tomore, Tullibardine Distinguishing features: Medicinal=0..1, Honey=0..2, Winey=0..2 …

Slide 1

Slide 1 text

Slide 2

Slide 2 text

Slide 3

Slide 3 text

Slide 4

Slide 4 text

Slide 5

Slide 5 text

Slide 6

Slide 6 text

Slide 7

Slide 7 text

Slide 8

Slide 8 text

Slide 9

Slide 9 text

Slide 10

Slide 10 text

Slide 11

Slide 11 text

Slide 12

Slide 12 text

Slide 13

Slide 13 text

Slide 14

Slide 14 text

Slide 15

Slide 15 text

Slide 16

Slide 16 text

Slide 17

Slide 17 text

Slide 18

Slide 18 text

Slide 19

Slide 19 text

Slide 20

Slide 20 text

Slide 21

Slide 21 text

Slide 22

Slide 22 text

Slide 23

Slide 23 text

Slide 24

Slide 24 text

Slide 25

Slide 25 text

Slide 26

Slide 26 text

Slide 27

Slide 27 text

Slide 28

Slide 28 text

Slide 29

Slide 29 text

Slide 30

Slide 30 text

Slide 31

Slide 31 text

Slide 32

Slide 32 text

Slide 33

Slide 33 text

Slide 34

Slide 34 text

Slide 35

Slide 35 text

Slide 36

Slide 36 text

Slide 37

Slide 37 text

Slide 38

Slide 38 text

Slide 39

Slide 39 text

Slide 40

Slide 40 text