Whisky clustering with Apache Groovy and Apache Ignite

Slide 1

Slide 1 text

objectcomputing.com © 2018, Object Computing, Inc. (OCI). All rights reserved. No part of these notes may be reproduced, stored in a retrieval system, or transmitted, in any form or by any means, electronic, mechanical, photocopying, recording, or otherwise, without the prior, written permission of Object Computing, Inc. (OCI) Whiskey Clustering with Groovy & Apache Ignite Presented by Paul King ASF Groovy PMC & Unity Foundation ASF – Software for the public good Unity – Open systems for sharing data and benefitting underserved populations © 2023 Unity Foundation. All rights reserved. See also: https://github.com/paulk-asert/groovy-data-science unityfoundation.io Halifax, Nova Scotia, October 7-10, 2023

Slide 14

Slide 14 text

Metaprogramming // imports not shown public class Book { private String $to$string; private int $hash$code; private final List authors; private final String title; private final Date publicationDate; private static final java.util.Comparator this$TitleComparator; private static final java.util.Comparator this$PublicationDateComparator; public Book(List authors, String title, Date publicationDate) { if (authors == null) { this.authors = null; } else { if (authors instanceof Cloneable) { List authorsCopy = (List) ((ArrayList) authors).clone(); this.authors = (List) (authorsCopy instanceof SortedSet ? DefaultGroovyMethods.asImmutable(authorsCopy) : authorsCopy instanceof SortedMap ? DefaultGroovyMethods.asImmutable(authorsCopy) : authorsCopy instanceof Set ? DefaultGroovyMethods.asImmutable(authorsCopy) : authorsCopy instanceof Map ? DefaultGroovyMethods.asImmutable(authorsCopy) : authorsCopy instanceof List ? DefaultGroovyMethods.asImmutable(authorsCopy) : DefaultGroovyMethods.asImmutable(authorsCopy)); } else { this.authors = (List) (authors instanceof SortedSet ? DefaultGroovyMethods.asImmutable(authors) : authors instanceof SortedMap ? DefaultGroovyMethods.asImmutable(authors) : authors instanceof Set ? DefaultGroovyMethods.asImmutable(authors) : authors instanceof Map ? DefaultGroovyMethods.asImmutable(authors) : authors instanceof List ? DefaultGroovyMethods.asImmutable(authors) : DefaultGroovyMethods.asImmutable(authors)); } } this.title = title; if (publicationDate == null) { this.publicationDate = null; } else { this.publicationDate = (Date) publicationDate.clone(); } } public Book(Map args) { if ( args == null) { args = new HashMap(); } ImmutableASTTransformation.checkPropNames(this, args); if (args.containsKey("authors")) { if ( args.get("authors") == null) { this .authors = null; } else { if (args.get("authors") instanceof Cloneable) { List authorsCopy = (List) ((ArrayList) args.get("authors")).clone(); this.authors = (List) (authorsCopy instanceof SortedSet ? DefaultGroovyMethods.asImmutable(authorsCopy) : authorsCopy instanceof SortedMap ? DefaultGroovyMethods.asImmutable(authorsCopy) : authorsCopy instanceof Set ? DefaultGroovyMethods.asImmutable(authorsCopy) : authorsCopy instanceof Map ? DefaultGroovyMethods.asImmutable(authorsCopy) : authorsCopy instanceof List ? DefaultGroovyMethods.asImmutable(authorsCopy) : DefaultGroovyMethods.asImmutable(authorsCopy)); } else { List authors = (List) args.get("authors"); this.authors = (List) (authors instanceof SortedSet ? DefaultGroovyMethods.asImmutable(authors) : authors instanceof SortedMap ? DefaultGroovyMethods.asImmutable(authors) : authors instanceof Set ? DefaultGroovyMethods.asImmutable(authors) : authors instanceof Map ? DefaultGroovyMethods.asImmutable(authors) : authors instanceof List ? DefaultGroovyMethods.asImmutable(authors) : DefaultGroovyMethods.asImmutable(authors)); } } } else { this .authors = null; } if (args.containsKey("title")) {this .title = (String) args.get("title"); } else { this .title = null;} if (args.containsKey("publicationDate")) { if (args.get("publicationDate") == null) { this.publicationDate = null; } else { this.publicationDate = (Date) ((Date) args.get("publicationDate")).clone(); } } else {this.publicationDate = null; } } … public Book() { this (new HashMap()); } public int compareTo(Book other) { if (this == other) { return 0; } Integer value = 0 value = this .title <=> other .title if ( value != 0) { return value } value = this .publicationDate <=> other .publicationDate if ( value != 0) { return value } return 0 } public static Comparator comparatorByTitle() { return this$TitleComparator; } public static Comparator comparatorByPublicationDate() { return this$PublicationDateComparator; } public String toString() { StringBuilder _result = new StringBuilder(); boolean $toStringFirst = true; _result.append("Book("); if ($toStringFirst) { $toStringFirst = false; } else { _result.append(", "); } _result.append(InvokerHelper.toString(this.getAuthors())); if ($toStringFirst) { $toStringFirst = false; } else { _result.append(", "); } _result.append(InvokerHelper.toString(this.getTitle())); if ($toStringFirst) { $toStringFirst = false; } else { _result.append(", "); } _result.append(InvokerHelper.toString(this.getPublicationDate())); _result.append(")"); if ($to$string == null) { $to$string = _result.toString(); } return $to$string; } public int hashCode() { if ( $hash$code == 0) { int _result = HashCodeHelper.initHash(); if (!(this.getAuthors().equals(this))) { _result = HashCodeHelper.updateHash(_result, this.getAuthors()); } if (!(this.getTitle().equals(this))) { _result = HashCodeHelper.updateHash(_result, this.getTitle()); } if (!(this.getPublicationDate().equals(this))) { _result = HashCodeHelper.updateHash(_result, this.getPublicationDate()); } $hash$code = (int) _result; } return $hash$code; } public boolean canEqual(Object other) { return other instanceof Book; } … public boolean equals(Object other) { if ( other == null) { return false; } if (this == other) { return true; } if (!( other instanceof Book)) { return false; } Book otherTyped = (Book) other; if (!(otherTyped.canEqual( this ))) { return false; } if (!(this.getAuthors() == otherTyped.getAuthors())) { return false; } if (!(this.getTitle().equals(otherTyped.getTitle()))) { return false; } if (!(this.getPublicationDate().equals(otherTyped.getPublicationDate()))) { return false; } return true; } public final Book copyWith(Map map) { if (map == null || map.size() == 0) { return this; } Boolean dirty = false; HashMap construct = new HashMap(); if (map.containsKey("authors")) { Object newValue = map.get("authors"); Object oldValue = this.getAuthors(); if (newValue != oldValue) { oldValue = newValue; dirty = true; } construct.put("authors", oldValue); } else { construct.put("authors", this.getAuthors()); } if (map.containsKey("title")) { Object newValue = map.get("title"); Object oldValue = this.getTitle(); if (newValue != oldValue) { oldValue = newValue; dirty = true; } construct.put("title", oldValue); } else { construct.put("title", this.getTitle()); } if (map.containsKey("publicationDate")) { Object newValue = map.get("publicationDate"); Object oldValue = this.getPublicationDate(); if (newValue != oldValue) { oldValue = newValue; dirty = true; } construct.put("publicationDate", oldValue); } else { construct.put("publicationDate", this.getPublicationDate()); } return dirty == true ? new Book(construct) : this; } public void writeExternal(ObjectOutput out) throws IOException { out.writeObject(authors); out.writeObject(title); out.writeObject(publicationDate); } public void readExternal(ObjectInput oin) throws IOException, ClassNotFoundException { authors = (List) oin.readObject(); title = (String) oin.readObject(); publicationDate = (Date) oin.readObject(); } … static { this$TitleComparator = new Book$TitleComparator(); this$PublicationDateComparator = new Book$PublicationDateComparator(); } public String getAuthors(int index) { return authors.get(index); } public List getAuthors() { return authors; } public final String getTitle() { return title; } public final Date getPublicationDate() { if (publicationDate == null) { return publicationDate; } else { return (Date) publicationDate.clone(); } } public int compare(java.lang.Object param0, java.lang.Object param1) { return -1; } private static class Book$TitleComparator extends AbstractComparator { public Book$TitleComparator() { } public int compare(Book arg0, Book arg1) { if (arg0 == arg1) { return 0; } if (arg0 != null && arg1 == null) { return -1; } if (arg0 == null && arg1 != null) { return 1; } return arg0.title <=> arg1.title; } public int compare(java.lang.Object param0, java.lang.Object param1) { return -1; } } private static class Book$PublicationDateComparator extends AbstractComparator { public Book$PublicationDateComparator() { } public int compare(Book arg0, Book arg1) { if ( arg0 == arg1 ) { return 0; } if ( arg0 != null && arg1 == null) { return -1; } if ( arg0 == null && arg1 != null) { return 1; } return arg0 .publicationDate <=> arg1 .publicationDate; } public int compare(java.lang.Object param0, java.lang.Object param1) { return -1; } @Immutable(copyWith = true) @Sortable(excludes = 'authors') @AutoExternalize class Book { @IndexedProperty List authors String title Date publicationDate }

Slide 41

Slide 41 text

import … def cols = ["Body", "Sweetness", "Smoky", "Medicinal", "Tobacco", "Honey", "Spicy", "Winey", "Nutty", "Malty", "Fruity", "Floral"] def numClusters = 5 def loader = new CSVLoader(file: 'whiskey.csv') def clusterer = new SimpleKMeans(numClusters: numClusters, preserveInstancesOrder: true) def instances = loader.dataSet instances.deleteAttributeAt(0) // remove RowID clusterer.buildClusterer(instances) println ' ' + cols.join(', ') def dataset = new DefaultCategoryDataset() clusterer.clusterCentroids.eachWithIndex{ Instance ctrd, num -> print "Cluster ${num+1}: " println ((1..cols.size()).collect{ sprintf '%.3f', ctrd.value(it) }.join(', ')) (1..cols.size()).each { idx -> dataset.addValue(ctrd.value(idx), "Cluster ${num+1}", cols[idx-1]) } } def clusters = (0.. clusters[cnum] << instances.get(idx).stringValue(0) } clusters.each { k, v -> println "Cluster ${k+1}:" println v.join(', ') } def plot = new SpiderWebPlot(dataset: dataset) def chart = new JFreeChart('Whiskey clusters', plot) SwingUtil.show(new ChartPanel(chart)) Whiskey – clustering with radar plot and weka Body, Sweetness, Smoky, Medicinal, Tobacco, Honey, Spicy, Winey, Nutty, Malty, Fruity, Floral Cluster 1: 3.800, 1.600, 3.600, 3.600, 0.600, 0.200, 1.600, 0.600, 1.000, 1.400, 1.200, 0.000 Cluster 2: 2.773, 2.409, 1.545, 0.045, 0.000, 1.818, 1.591, 2.000, 2.091, 2.136, 2.136, 1.591 Cluster 3: 1.773, 2.455, 1.318, 0.636, 0.000, 0.636, 1.000, 0.409, 1.636, 1.364, 1.591, 1.591 Cluster 4: 1.500, 2.233, 1.267, 0.267, 0.000, 1.533, 1.400, 0.700, 1.000, 1.900, 1.900, 2.133 Cluster 5: 2.000, 2.143, 1.857, 0.857, 1.000, 0.857, 1.714, 1.000, 1.286, 2.000, 1.429, 1.714 Cluster 1: Ardbeg, Clynelish, Lagavulin, Laphroig, Talisker Cluster 2: Aberfeldy, Aberlour, Ardmore, Auchroisk, Balmenach, BenNevis, Benrinnes, Benromach, BlairAthol, Dailuaine, Dalmore, Edradour, Glendronach, Glendullan, Glenfarclas, Glenrothes, Glenturret, Longmorn, Macallan, Mortlach, RoyalLochnagar, Strathisla Cluster 3: ArranIsleOf, Aultmore, Balblair, Cardhu, Craigganmore, Dufftown, GlenGrant, GlenKeith, GlenScotia, GlenSpey, Glenfiddich, Glenmorangie, Isle of Jura, Mannochmore, Miltonduff, Oban, Speyside, Springbank, Strathmill, Tamnavulin, Teaninich, Tomore Cluster 4: AnCnoc, Auchentoshan, Belvenie, Benriach, Bladnoch, Bowmore, Bruichladdich, Bunnahabhain, Dalwhinnie, Deanston, GlenElgin, GlenGarioch, GlenMoray, GlenOrd, Glenallachie, Glengoyne, Glenkinchie, Glenlivet, Glenlossie, Highland Park, Inchgower, Knochando, Linkwood, Loch Lomond, Scapa, Speyburn, Tamdhu, Tobermory, Tomatin, Tomintoul Cluster 5: Caol Ila, Craigallechie, GlenDeveronMacduff, OldFettercairn, OldPulteney, RoyalBrackla, Tullibardine

Slide 1

Slide 1 text

Slide 2

Slide 2 text

Slide 3

Slide 3 text

Slide 4

Slide 4 text

Slide 5

Slide 5 text

Slide 6

Slide 6 text

Slide 7

Slide 7 text

Slide 8

Slide 8 text

Slide 9

Slide 9 text

Slide 10

Slide 10 text

Slide 11

Slide 11 text

Slide 12

Slide 12 text

Slide 13

Slide 13 text

Slide 14

Slide 14 text

Slide 15

Slide 15 text

Slide 16

Slide 16 text

Slide 17

Slide 17 text

Slide 18

Slide 18 text

Slide 19

Slide 19 text

Slide 20

Slide 20 text

Slide 21

Slide 21 text

Slide 22

Slide 22 text

Slide 23

Slide 23 text

Slide 24

Slide 24 text

Slide 25

Slide 25 text

Slide 26

Slide 26 text

Slide 27

Slide 27 text

Slide 28

Slide 28 text

Slide 29

Slide 29 text

Slide 30

Slide 30 text

Slide 31

Slide 31 text

Slide 32

Slide 32 text

Slide 33

Slide 33 text

Slide 34

Slide 34 text

Slide 35

Slide 35 text

Slide 36

Slide 36 text

Slide 37

Slide 37 text

Slide 38

Slide 38 text

Slide 39

Slide 39 text

Slide 40

Slide 40 text