$30 off During Our Annual Pro Sale. View Details »

Automazione per la SEO: esempi pratici da un non-developer

Automazione per la SEO: esempi pratici da un non-developer

In questo intervento ho trattato l'utilizzo di strumenti e API per automatizzare attività quotidiane di analisi e monitoraggio.

Gianluca Campo

May 09, 2019
Tweet

More Decks by Gianluca Campo

Other Decks in Marketing & SEO

Transcript

  1. None
  2. None
  3. None
  4. None
  5. None
  6. None
  7. None
  8. None
  9. None
  10. None
  11. None
  12. None
  13. None
  14. None
  15. ▪ ▪ ▪

  16. <!DOCTYPE html> <html> <head> </head> <body> <h1>My First Heading</h1> <a

    href=“link1.html”>link1</a> <a href=“link2.html”>link2</a> <p>testo</p> </body> </html> /html/body/a[1]/@href
  17. /axis::node-test[predicate]/axis::node-test[predicate]/axis::node-test[predicate] ▪ → → → /locationstep/locationstep/locationstep

  18. /html/body/a[1]/@href /child::html/child::body/child::a[1]/attribute::href

  19. ▪ <nome-tag>esempio di contenuto</nome-tag> ▪ <nome-tag attributo=“valore-attributo”>esempio di contenuto</nome-tag> ▪

    //nome-tag/@attributo
  20. ▪ → → ▪ → contains(str1, str2) → starts-with(str1, str2)

    /html/body/a[1]/@href //a[1]/@href //a[contains(@href, "link1.html")] //a[starts-with(@href, "link1")]
  21. None
  22. ▪ //*[@id="menu-item-5015"]/a ▪ //ul[@id="menu-primary-items"]/li/a

  23. =IMPORTXML(url, xpath_query) ▪ ▪

  24. =XPathOnUrl(url, xpath, attribute, xmlHTTPSettings, mode) ▪ ▪ ▪

  25. ▪ ▪

  26. ▪ ▪ ▪

  27. ▪ ▪

  28. from lxml import html import requests urls = open("urls.txt", "r")

    results_file = open("results.txt", "a+") for item in urls: url = item.rstrip("\n") page = requests.get(url) tree = html.fromstring(page.content) text = tree.xpath('//ul[@id="menu-primary-items"]/li/a/text()') results_file.write("%s,%s\n" % (url, text)) print ("SCRAPING " + url) print (text, "\n") results_file.close() ▪ ▪ ▪ ▪ ▪
  29. //title //meta[@name="description"]/@content //link[@hreflang="it-IT"]/@href //link[contains(@hreflang, *)]/@href //link[@rel="canonical"]/@href //meta[@name="robots"]/@content //h1 //url/loc/text()

  30. None
  31. ▪ → → → ▪

  32. https://www.googleapis.com/an alytics/v3/data/ga ?ids=ga:XXXX&start-date=2019- 01-01&end-date=2019-03- 31&metrics=ga:sessions&filter s=ga:country==Italy&access_to ken=XXXX https://www.googleapis.com/an alytics/v3/data/ga ?ids=ga:XXXX

    &start-date=2019-01-01 &end-date=2019-03-31 &metrics=ga:sessions &filters=ga:country==Italy &access_token=XXXX ▪ ▪ ▪
  33. https://www.googleapis.com/an alytics/v3/data/ga ?ids=ga:XXXX &start-date=2019-01-01 &end-date=2019-03-31 &metrics=ga:sessions &filters=ga:country==Italy

  34. https://www.googleapis.com/an alytics/v3/data/ga ?ids=ga:XXXX &start-date=2019-01-01 &end-date=2019-03-31 &metrics=ga:sessions &filters=ga:country==Italy ▪ ▪ ▪

  35. https://www.googleapis.com/an alytics/v3/data/ga ?ids=ga:XXXX &start-date=2019-01-01 &end-date=2019-03-31 &metrics=ga:sessions &filters=ga:country==Italy ▪ ▪ ▪

  36. https://www.googleapis.com/an alytics/v3/data/ga ?ids=ga:XXXX &start-date=2019-01-01 &end-date=2019-03-31 &metrics=ga:sessions &filters=ga:country==Italy ▪ ▪ ▪

  37. ▪ ▪ ▪ ga:name operator expression ga:country == Italy

  38. None
  39. ▪ ▪

  40. None
  41. None
  42. ▪ ▪

  43. None
  44. ▪ ▪ → → → →

  45. ▪ ▪ https://www.googleapis.com/webmasters/v3/s ites/XXXX/searchAnalytics/query { "startDate": "2019-01-01", "endDate": "2019-03-31", "dimensions":

    ["query"], "dimensionFilterGroups": [ { "filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": 25000 "startRow": 0 }
  46. https://www.googleapis.com/webmasters/v3/s ites/XXXX/searchAnalytics/query { "startDate": "2019-01-01", "endDate": "2019-03-31", "dimensions": ["query"], "dimensionFilterGroups":

    [ { "filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": 25000 "startRow": 0 }
  47. { "startDate": "2019-01-01", "endDate": "2019-03-31", "dimensions": ["query"], "dimensionFilterGroups": [ {

    "filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": 25000 "startRow": 0 } ▪ ▪ ▪ https://www.googleapis.com/webmasters/v3/s ites/XXXX/searchAnalytics/query
  48. { "startDate": "2019-01-01", "endDate": "2019-03-31", "dimensions": ["query"], "dimensionFilterGroups": [ {

    "filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": 25000 "startRow": 0 } ▪ ▪ https://www.googleapis.com/webmasters/v3/s ites/XXXX/searchAnalytics/query
  49. { "startDate": "2019-01-01", "endDate": "2019-03-31", "dimensions": ["query"], "dimensionFilterGroups": [ {

    "filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": 25000 "startRow": 0 } ▪ ▪ https://www.googleapis.com/webmasters/v3/s ites/XXXX/searchAnalytics/query
  50. ▪ ▪ ▪ "dimension": string, "operator": string, "expression": string "dimension":

    country, "operator": equals, "expression": ITA
  51. { "startDate": "2019-01-01", "endDate": "2019-03-31", "dimensions": ["query"], "dimensionFilterGroups": [ {

    "filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": 25000 "startRow": 0 } ▪ ▪ https://www.googleapis.com/webmasters/v3/s ites/XXXX/searchAnalytics/query
  52. { "startDate": "2019-01-01", "endDate": "2019-03-31", "dimensions": ["query"], "dimensionFilterGroups": [ {

    "filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": 25000 "startRow": 0 } ▪ ▪ https://www.googleapis.com/webmasters/v3/s ites/XXXX/searchAnalytics/query
  53. { "startDate": "2019-01-01", "endDate": "2019-03-31", "dimensions": ["query"], "dimensionFilterGroups": [ {

    "filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": 25000 "startRow": 0 } ▪ ▪ → → https://www.googleapis.com/webmasters/v3/s ites/XXXX/searchAnalytics/query
  54. ▪ ▪ ▪

  55. ▪ ▪ ▪

  56. ▪ ▪

  57. None
  58. ▪ ▪ ▪

  59. ... rowLimit = 25000 retrieve_search_queries = webmasters_service.searchanalytics().query( siteUrl='ENTER-YOURS-HERE', body={ "startDate":

    "2019-01-01", "endDate": "2019-03-31", "dimensions": ["query"], "dimensionFilterGroups": [ { "filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": rowLimit } ).execute() results_file = open("results.txt", "a+") for i in range(0, rowLimit): keys = retrieve_search_queries['rows'][i]['keys'] impressions = retrieve_search_queries['rows'][i]['impressions'] clicks = retrieve_search_queries['rows'][i]['clicks'] ctr = retrieve_search_queries['rows'][i]['ctr'] position = retrieve_search_queries['rows'][i]['position'] print ("%s|%s|%s|%s|%s\n" % (keys, impressions, clicks, ctr, position)) results_file.write ("%s|%s|%s|%s|%s\n" % (keys, impressions, clicks, ctr, position)) results_file.close()
  60. None
  61. None
  62. ▪ ▪ ▪

  63. ▪ ▪ ▪ https://adwords.google.com/api/adwords/cm/ v201809/CampaignService <?xml version="1.0"?> <soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" xmlns:xsd="http://www.w3.org/2001/XMLSchema"

    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <soapenv:Header> ... </soapenv:Header> <soapenv:Body> ... </soapenv:Body> </soapenv:Envelope>
  64. ▪ ▪ ▪ → →

  65. ▪ ▪

  66. ▪ ▪ ▪ → → → − − →

  67. ▪ ▪ → →

  68. ▪ ▪ ▪

  69. ▪ ▪

  70. ▪ ▪

  71. ▪ ▪

  72. ▪ ▪

  73. ▪ ▪ → → → →

  74. ▪ ▪ → → →

  75. ▪ ▪ → →

  76. ▪ ▪ ▪

  77. None
  78. None
  79. ▪ ▪ ▪

  80. ▪ ▪ ▪

  81. ▪ ▪ ▪ ▪ ▪ → → →

  82. ... def main(client, item, ad_group_id=None): # Initialize appropriate service. targeting_idea_service

    = client.GetService( 'TargetingIdeaService', version='v201809') # Construct selector object and retrieve related keywords. selector = { 'ideaType': 'KEYWORD', 'requestType': 'STATS' } selector['requestedAttributeTypes'] = [ 'KEYWORD_TEXT', 'SEARCH_VOLUME'] offset = 0 selector['paging'] = { 'startIndex': str(offset), 'numberResults': str(PAGE_SIZE) } selector['searchParameters'] = [{ 'xsi_type': 'RelatedToQuerySearchParameter', 'queries': item }] # Language setting (optional). selector['searchParameters'].append({ # The ID can be found in the documentation: # https://developers.google.com/adwords/api/docs/appendix/languagecodes 'xsi_type': 'LanguageSearchParameter', 'languages': [{'id': '1004'}] }) # Location setting (optional). selector['searchParameters'].append({ # The ID can be found in the documentation: # https://developers.google.com/adwords/api/docs/appendix/geotargeting 'xsi_type': 'LocationSearchParameter', 'locations': [{'id': '2380'}] }) # Network search parameter (optional) selector['searchParameters'].append({ 'xsi_type': 'NetworkSearchParameter', 'networkSetting': { 'targetGoogleSearch': True, 'targetSearchNetwork': False, 'targetContentNetwork': False, 'targetPartnerSearchNetwork': False } }) ▪ ▪ ▪ ▪ ▪ ▪ ▪
  83. ... # Display results. if 'entries' in page: for result

    in page['entries']: attributes = {} for attribute in result['data']: attributes[attribute['key']] = getattr(attribute['value'], 'value', '0') results_file.write('%s|%s|%s\n' % (item, attributes['KEYWORD_TEXT'], attributes['SEARCH_VOLUME'])) print ('%s|%s|%s' % (item, attributes['KEYWORD_TEXT'], attributes['SEARCH_VOLUME'])) print else: print ('No related keywords were found.') offset += PAGE_SIZE selector['paging']['startIndex'] = str(offset) more_pages = offset < int(page['totalNumEntries']) if __name__ == '__main__': # Initialize client object. adwords_client = adwords.AdWordsClient.LoadFromStorage("ABSOLUTE-PATH-TO-googleads.yaml") adwords_client.SetClientCustomerId('ENTER-YOURS-HERE') kwds = open("kwds.txt","r") #reload(sys) #sys.setdefaultencoding('utf-8') for line in kwds: item = line.strip() results_file = open("results.txt", "a+") main(adwords_client, item, int(AD_GROUP_ID) if AD_GROUP_ID.isdigit() else None) print(datetime.datetime.now()) results_file.close() sleep(2) ▪ ▪ ▪ ▪
  84. ... # Construct selector object and retrieve related keywords. selector

    = { 'ideaType': 'KEYWORD', 'requestType': ‘IDEAS' } selector['requestedAttributeTypes'] = [ 'KEYWORD_TEXT', 'SEARCH_VOLUME'] offset = 0 selector['paging'] = { 'startIndex': str(offset), 'numberResults’: 10 } ...
  85. None
  86. None
  87. from lxml import html import requests urls = open("urls.txt", "r")

    results_file = open("results.txt", "w") for item in urls: url = item.rstrip("\n") page = requests.get(url) tree = html.fromstring(page.content) text = tree.xpath('//h3[@class="r"]/a/@href') results_file.write("%s,%s\n" % (url, text)) print ("SCRAPING " + url) print (text, "\n") results_file.close() ▪ ▪ ▪ ▪
  88. ▪ ▪ ▪ ▪ https://www.google.[com]/search?q=site:[dominio]&start=[#pagina]&...

  89. ▪ ▪ ▪ /url?q=http://www.simpleagency.it/&sa=U&ved=0ahUKEwizuOnv1YTiAhU9GLkGHQUZAe8QFggUMAA& usg=AOvVaw2SLUR7xqI7OaMms1_bXQ3h

  90. ... #download and store new html file os.rename('/home/giancampo/diff-html/new_html.html', '/home/giancampo/diff-html/old_html.html') url

    = ‘YOUR-HOMEPAGE-URL' response = urllib2.urlopen(url) webContent = response.read() f = open('/home/giancampo/diff-html/new_html.html', 'w') f.write(webContent) f.close() #convert html to txt files html1 = open('/home/giancampo/diff-html/old_html.html').read() html2 = open('/home/giancampo/diff-html/new_html.html').read() old_file = html2text.html2text(html1) new_file = html2text.html2text(html2) #write text into txt files old_text = open('/home/giancampo/diff-html/old_text.txt', 'w') new_text = open('/home/giancampo/diff-html/new_text.txt', 'w') old_text.write(old_file) new_text.write(new_file) old_text.close() new_text.close() ... ▪ ▪
  91. ... #send an email if the script has found differences

    if filecmp.cmp('/home/giancampo/diff-html/old_text.txt', '/home/giancampo/diff-html/new_text.txt') == True: print 'no emails sent' else: gmail_user = ‘YOUR-GMAIL-ADDRESS' gmail_password = YOUR-GMAIL-PASSWORD' sent_from = gmail_user to = ['gianluca.campo@optimize.it'] subject = 'Changes in the homepage!' body = _diff email_text = '''From: %s\nTo: %s\nSubject: %s\n\n%s''' % (sent_from, ', '.join(to), subject, body) server = smtplib.SMTP_SSL('smtp.gmail.com', 465) server.ehlo() server.login(gmail_user, gmail_password) server.sendmail(sent_from, to, email_text) server.close() print 'Email sent!' #files closing diff_file.close() ▪ ▪ ▪
  92. None
  93. None
  94. None
  95. ▪ ▪ ▪ → → ▪

  96. None
  97. None
  98. None
  99. None
  100. None
  101. None
  102. None
  103. None
  104. None
  105. None
  106. None
  107. None
  108. None
  109. ▪ ▪

  110. ▪ ▪ ... if __name__ == '__main__': # Initialize client

    object. adwords_client = adwords.AdWordsClient.LoadFromStorage("C:\\Users\\gianl\\AppDa ta\\Local\\Programs\\Python\\Python37\\_i miei script\\adwords-api\\googleads.yaml") adwords_client.SetClientCustomerId('ENTER-YOURS-HERE') kwds = open("kwds.txt","r") reload(sys) sys.setdefaultencoding('utf-8') for line in kwds: item = line.strip() results_file = open("results.txt", "a+") main(adwords_client, item, int(AD_GROUP_ID) if AD_GROUP_ID.isdigit() else None) print(datetime.datetime.now()) results_file.close() sleep(2)