Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Crunching data with go: Tips, tricks, use-cases

Crunching data with go: Tips, tricks, use-cases

Talk for the first meetup of Munich Golang User Group. Described use-cases from real Go development, covered fetching data from sql database, connecting to Google services like Google Analytics, Google BigQuery, other aspects of building a geolocation application.

Sergii Khomenko

April 24, 2014
Tweet

More Decks by Sergii Khomenko

Other Decks in Programming

Transcript

  1. ! Crunching data with go: Tips, tricks, use-cases S e

    r g i i K h o m e n k o , D a t a S c i e n t i s t , S T Y L I G H T s e r g i i . k h o m e n k o @ s t y l i g h t . c o m @ l c 0 d 3 r M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H
  2. Agenda Relational databases ! Google Analytics and BigQuery ! Geolocation

    ! Useful things from Go-world W H A T I T ’ S A B O U T
  3. • github.com/jmoiron/sqlx type Clickout struct {! ! Id, Count int!

    ! Ip string! ! Type int! ! Commision, Eu_commission float32! }
  4. ! db, err := sqlx.Connect(config.Database.Driver, fmt.Sprintf("%s:%s@%s(%s)/%s? parseTime=true", config.Database.Username,! ! !

    config.Database.Password, config.Database.Protocol, config.Database.Server, config.Database.Database))! ! ! fmt.Printf("Connect to %s:(%s)... \n", config.Database.Protocol, config.Database.Server)! ! if err != nil {! ! ! log.Fatalf("Can not connect to the mysql server - %s", err)! ! ! return! ! }! ! defer db.Close()! ! !
  5. ! dbParams := paramStruct{"start": arguments["<from>"].(string) + " 00:00:00", "end": arguments["<to>"].(string)

    + " 23:59:59"}! ! geoParams := paramStruct{}! ! ! siteStr, _ := arguments["--site"].(string)! ! if siteInt, err2 := strconv.Atoi(siteStr); err2 == nil {! ! ! dbParams["site"] = siteInt! ! }! ! ! query := getClickoutsQuery(dbParams)! ! rows, err := db.Queryx(query)
  6. ! if err == nil {! ! ! for rows.Next()

    {! ! ! ! click := Clickout{}! ! ! ! ! err2 := rows.StructScan(&click)! ! ! ! if err2 == nil {! ! ! ! ! task <- click! ! ! ! ! } else {! ! ! ! ! fmt.Println(err2)! ! ! ! }! ! ! }! ! ! close(task)! ! } else {! ! ! log.Fatalf("SQL Error - %s", err)! ! }!
  7. ! task := make(chan Clickout)! ! result := make(chan IpResult)!

    ! done = make(chan interface{})! ! ! go processChannel(task, result)! ! go aggregateResults(result, &results)! ! ! if err == nil {! ! ! for rows.Next() {! ! ! ! click := Clickout{}! ! ! ! ! err2 := rows.StructScan(&click)! ! ! ! if err2 == nil {! ! ! ! ! task <- click! ! ! ! ! } else {! ! ! ! ! fmt.Println(err2)! ! ! ! }! ! ! }! ! ! close(task)! ! } else {! ! ! log.Fatalf("SQL Error - %s", err)! ! }
  8. func processChannel(tc chan Clickout, rc chan IpResult) {! ! for

    click := range tc {! ! ! if subnet, err := findNetwork(click.Ip); err == nil {! ! ! ! rc <- IpResult{click, subnet}! ! ! } else {! ! ! ! rc <- IpResult{click, new(IpSubnet)}! ! ! }! ! }! ! close(rc)! }!
  9. func aggregateResults(rc chan IpResult, rs *map[string]*AggrResults) {! ! results :=

    *rs! ! found, notFound := 0, 0! ! ! for result := range rc {! ! ! if result.Subnet.startInt == 0 {! ! ! ! notFound += result.click.Count! ! ! ! log.Printf("Can not find ip %s\n", result.click.Ip)! ! ! } else {! ! ! ! found += result.click.Count! ! ! ! log.Printf("%s is {%s - %s} \n", result.click.Ip,! ! ! ! ! result.Subnet.startIp, result.Subnet.endIp)! ! ! ! ! AddResult(&results, result)! ! ! }! ! }! ! fmt.Printf("%f (%d) IPs in GeoIP db and %f (%d) not found out of %d\n", float32(found)/float32(found+notFound),! ! ! found, float32(notFound)/float32(found+notFound), notFound, found+notFound)! ! ! close(done)! }! !
  10. package main! ! import (! ! "fmt"! ! "runtime"! )!

    ! func main() {! ! ! fmt.Printf("GOMAXPROCS is %d %d %d\n", runtime.GOMAXPROCS(0), runtime.NumCPU(), runtime.NumGoroutine())! ! ! runtime.GOMAXPROCS(runtime.NumCPU())! ! fmt.Printf("GOMAXPROCS is %d %d %d\n", runtime.GOMAXPROCS(0), runtime.NumCPU(), runtime.NumGoroutine())! ! }!
  11. ! db, err := geoip2.Open("data/GeoLite2-City.mmdb")! ! if err != nil

    {! ! ! panic(err)! ! }! ! ! ! ip := net.ParseIP("81.2.69.142")! ! record, err := db.City(ip)! ! if err != nil {! ! ! panic(err)! ! }! ! ! fmt.Printf("Portuguese (BR) city name: %v\n", record.City.Names["pt-BR"])! ! fmt.Printf("English subdivision name: %v\n", record.Subdivisions[0].Names["en"])! ! fmt.Printf("Russian country name: %v\n", record.Country.Names["ru"])! ! fmt.Printf("ISO country code: %v\n", record.Country.IsoCode)! ! fmt.Printf("Time zone: %v\n", record.Location.TimeZone)! ! fmt.Printf("Coordinates: %v, %v\n", record.Location.Latitude, record.Location.Longitude)! ! ! db.Close()
  12. var config = &oauth.Config{! ! ClientId: “client-id-here.apps.googleusercontent.com",! ! ClientSecret: “client-secret-here“,!

    ! Scope: "https://www.googleapis.com/auth/analytics.readonly",! ! AuthURL: "https://accounts.google.com/o/oauth2/auth",! ! TokenURL: "https://accounts.google.com/o/oauth2/token",! }
  13. ! oauthHttpClient := getOAuthClient(config)! ! analyticsService, err := analytics.New(oauthHttpClient)! !

    if err != nil {! ! ! log.Fatal("Failed to create GA service")! ! }! ! ! dataService := analytics.NewDataGaService(analyticsService)! ! dataGaGetCall := dataService.Get(gaId, start, end, metrics)
  14. ! data, err := dataGaGetCall.Do()! ! if err != nil

    {! ! ! log.Fatal("Failed fetch data from GA")! ! }! ! ! return data.Rows
  15. func main() {! ! gaOptions := map[string]string{! ! ! "dimensions":

    "ga:region,ga:city",! ! ! "sort": "-ga:visits",! ! ! "limit": "10",! ! }! ! rows := fetchGAData(config, "ga:11781168", "2014-04-06", "2014-04-06", ! "ga:visits", gaOptions)! ! ! for row := 0; row <= len(rows)-1; row++ {! ! ! fmt.Printf("row=%d %v\n", row, rows[row])! ! }! }
  16. ! config := &oauth.Config{! ! ! ClientId: "client-id-here.apps.googleusercontent.com",! ! !

    ClientSecret: "client-secret-here",! ! ! Scope: bigquery.BigqueryScope,! ! ! AuthURL: "https://accounts.google.com/o/oauth2/auth",! ! ! TokenURL: "https://accounts.google.com/o/oauth2/token",! ! }! ! ! transport := &oauth.Transport{! ! ! Token: token,! ! ! Config: config,! ! }! ! client := transport.Client()
  17. ! service, err := bigquery.New(client)! ! if err != nil

    {! ! ! panic(err)! ! }! ! ! datasetList, err := service.Datasets.List(“testing-project").Do()! ! if err != nil {! ! ! panic(err)! ! }! ! ! for _, d := range datasetList.Datasets {! ! ! fmt.Println(d.FriendlyName)! ! }!
  18. Interesting Gophers • Golang machine learning lib 
 https://github.com/xlvector/hector •

    Logistic Regression • Factorized Machine • CART, Random Forest, Random Decision Tree, Gradient Boosting Decision Tree • Neural Network
  19. Interesting Gophers • library for numeric operation
 https://github.com/gonum - fairly,

    but they are working to bring some useful packages • matrix - Scientific math package for the Go language. • graph - Discrete math structures and functions
  20. Reference list • Why are ‘Cool Kids’ at Github Moving

    to GO Language? - http://www.homolog.us/blogs/blog/ 2014/01/16/golang/ • How suitable Go will be for scientific computing? - https://groups.google.com/forum/#!topic/golang- nuts/_VoZfniBTZE
  21. Thank you! M U N I C H G O

    P H E R S - A P R 2 4 2 0 1 4 , M U N I C H
  22. M U N I C H G O P H

    E R S - A P R 2 4 2 0 1 4 , M U N I C H S e r g i i K h o m e n k o , D a t a S c i e n t i s t S T Y L I G H T G m b H s e r g i i . k h o m e n k o @ s t y l i g h t . c o m @ l c 0 d 3 r ! S T Y L I G H T . C O M