Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Automazione per la SEO: esempi pratici da un non-developer

Automazione per la SEO: esempi pratici da un non-developer

In questo intervento ho trattato l'utilizzo di strumenti e API per automatizzare attività quotidiane di analisi e monitoraggio.

Gianluca Campo

May 09, 2019
Tweet

More Decks by Gianluca Campo

Other Decks in Marketing & SEO

Transcript

  1. <!DOCTYPE html> <html> <head> </head> <body> <h1>My First Heading</h1> <a

    href=“link1.html”>link1</a> <a href=“link2.html”>link2</a> <p>testo</p> </body> </html> /html/body/a[1]/@href
  2. ▪ → → ▪ → contains(str1, str2) → starts-with(str1, str2)

    /html/body/a[1]/@href //a[1]/@href //a[contains(@href, "link1.html")] //a[starts-with(@href, "link1")]
  3. from lxml import html import requests urls = open("urls.txt", "r")

    results_file = open("results.txt", "a+") for item in urls: url = item.rstrip("\n") page = requests.get(url) tree = html.fromstring(page.content) text = tree.xpath('//ul[@id="menu-primary-items"]/li/a/text()') results_file.write("%s,%s\n" % (url, text)) print ("SCRAPING " + url) print (text, "\n") results_file.close() ▪ ▪ ▪ ▪ ▪
  4. ▪ ▪ https://www.googleapis.com/webmasters/v3/s ites/XXXX/searchAnalytics/query { "startDate": "2019-01-01", "endDate": "2019-03-31", "dimensions":

    ["query"], "dimensionFilterGroups": [ { "filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": 25000 "startRow": 0 }
  5. https://www.googleapis.com/webmasters/v3/s ites/XXXX/searchAnalytics/query { "startDate": "2019-01-01", "endDate": "2019-03-31", "dimensions": ["query"], "dimensionFilterGroups":

    [ { "filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": 25000 "startRow": 0 }
  6. { "startDate": "2019-01-01", "endDate": "2019-03-31", "dimensions": ["query"], "dimensionFilterGroups": [ {

    "filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": 25000 "startRow": 0 } ▪ ▪ ▪ https://www.googleapis.com/webmasters/v3/s ites/XXXX/searchAnalytics/query
  7. { "startDate": "2019-01-01", "endDate": "2019-03-31", "dimensions": ["query"], "dimensionFilterGroups": [ {

    "filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": 25000 "startRow": 0 } ▪ ▪ https://www.googleapis.com/webmasters/v3/s ites/XXXX/searchAnalytics/query
  8. { "startDate": "2019-01-01", "endDate": "2019-03-31", "dimensions": ["query"], "dimensionFilterGroups": [ {

    "filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": 25000 "startRow": 0 } ▪ ▪ https://www.googleapis.com/webmasters/v3/s ites/XXXX/searchAnalytics/query
  9. { "startDate": "2019-01-01", "endDate": "2019-03-31", "dimensions": ["query"], "dimensionFilterGroups": [ {

    "filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": 25000 "startRow": 0 } ▪ ▪ https://www.googleapis.com/webmasters/v3/s ites/XXXX/searchAnalytics/query
  10. { "startDate": "2019-01-01", "endDate": "2019-03-31", "dimensions": ["query"], "dimensionFilterGroups": [ {

    "filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": 25000 "startRow": 0 } ▪ ▪ https://www.googleapis.com/webmasters/v3/s ites/XXXX/searchAnalytics/query
  11. { "startDate": "2019-01-01", "endDate": "2019-03-31", "dimensions": ["query"], "dimensionFilterGroups": [ {

    "filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": 25000 "startRow": 0 } ▪ ▪ → → https://www.googleapis.com/webmasters/v3/s ites/XXXX/searchAnalytics/query
  12. ... rowLimit = 25000 retrieve_search_queries = webmasters_service.searchanalytics().query( siteUrl='ENTER-YOURS-HERE', body={ "startDate":

    "2019-01-01", "endDate": "2019-03-31", "dimensions": ["query"], "dimensionFilterGroups": [ { "filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": rowLimit } ).execute() results_file = open("results.txt", "a+") for i in range(0, rowLimit): keys = retrieve_search_queries['rows'][i]['keys'] impressions = retrieve_search_queries['rows'][i]['impressions'] clicks = retrieve_search_queries['rows'][i]['clicks'] ctr = retrieve_search_queries['rows'][i]['ctr'] position = retrieve_search_queries['rows'][i]['position'] print ("%s|%s|%s|%s|%s\n" % (keys, impressions, clicks, ctr, position)) results_file.write ("%s|%s|%s|%s|%s\n" % (keys, impressions, clicks, ctr, position)) results_file.close()
  13. ▪ ▪ ▪ https://adwords.google.com/api/adwords/cm/ v201809/CampaignService <?xml version="1.0"?> <soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" xmlns:xsd="http://www.w3.org/2001/XMLSchema"

    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <soapenv:Header> ... </soapenv:Header> <soapenv:Body> ... </soapenv:Body> </soapenv:Envelope>
  14. ... def main(client, item, ad_group_id=None): # Initialize appropriate service. targeting_idea_service

    = client.GetService( 'TargetingIdeaService', version='v201809') # Construct selector object and retrieve related keywords. selector = { 'ideaType': 'KEYWORD', 'requestType': 'STATS' } selector['requestedAttributeTypes'] = [ 'KEYWORD_TEXT', 'SEARCH_VOLUME'] offset = 0 selector['paging'] = { 'startIndex': str(offset), 'numberResults': str(PAGE_SIZE) } selector['searchParameters'] = [{ 'xsi_type': 'RelatedToQuerySearchParameter', 'queries': item }] # Language setting (optional). selector['searchParameters'].append({ # The ID can be found in the documentation: # https://developers.google.com/adwords/api/docs/appendix/languagecodes 'xsi_type': 'LanguageSearchParameter', 'languages': [{'id': '1004'}] }) # Location setting (optional). selector['searchParameters'].append({ # The ID can be found in the documentation: # https://developers.google.com/adwords/api/docs/appendix/geotargeting 'xsi_type': 'LocationSearchParameter', 'locations': [{'id': '2380'}] }) # Network search parameter (optional) selector['searchParameters'].append({ 'xsi_type': 'NetworkSearchParameter', 'networkSetting': { 'targetGoogleSearch': True, 'targetSearchNetwork': False, 'targetContentNetwork': False, 'targetPartnerSearchNetwork': False } }) ▪ ▪ ▪ ▪ ▪ ▪ ▪
  15. ... # Display results. if 'entries' in page: for result

    in page['entries']: attributes = {} for attribute in result['data']: attributes[attribute['key']] = getattr(attribute['value'], 'value', '0') results_file.write('%s|%s|%s\n' % (item, attributes['KEYWORD_TEXT'], attributes['SEARCH_VOLUME'])) print ('%s|%s|%s' % (item, attributes['KEYWORD_TEXT'], attributes['SEARCH_VOLUME'])) print else: print ('No related keywords were found.') offset += PAGE_SIZE selector['paging']['startIndex'] = str(offset) more_pages = offset < int(page['totalNumEntries']) if __name__ == '__main__': # Initialize client object. adwords_client = adwords.AdWordsClient.LoadFromStorage("ABSOLUTE-PATH-TO-googleads.yaml") adwords_client.SetClientCustomerId('ENTER-YOURS-HERE') kwds = open("kwds.txt","r") #reload(sys) #sys.setdefaultencoding('utf-8') for line in kwds: item = line.strip() results_file = open("results.txt", "a+") main(adwords_client, item, int(AD_GROUP_ID) if AD_GROUP_ID.isdigit() else None) print(datetime.datetime.now()) results_file.close() sleep(2) ▪ ▪ ▪ ▪
  16. ... # Construct selector object and retrieve related keywords. selector

    = { 'ideaType': 'KEYWORD', 'requestType': ‘IDEAS' } selector['requestedAttributeTypes'] = [ 'KEYWORD_TEXT', 'SEARCH_VOLUME'] offset = 0 selector['paging'] = { 'startIndex': str(offset), 'numberResults’: 10 } ...
  17. from lxml import html import requests urls = open("urls.txt", "r")

    results_file = open("results.txt", "w") for item in urls: url = item.rstrip("\n") page = requests.get(url) tree = html.fromstring(page.content) text = tree.xpath('//h3[@class="r"]/a/@href') results_file.write("%s,%s\n" % (url, text)) print ("SCRAPING " + url) print (text, "\n") results_file.close() ▪ ▪ ▪ ▪
  18. ... #download and store new html file os.rename('/home/giancampo/diff-html/new_html.html', '/home/giancampo/diff-html/old_html.html') url

    = ‘YOUR-HOMEPAGE-URL' response = urllib2.urlopen(url) webContent = response.read() f = open('/home/giancampo/diff-html/new_html.html', 'w') f.write(webContent) f.close() #convert html to txt files html1 = open('/home/giancampo/diff-html/old_html.html').read() html2 = open('/home/giancampo/diff-html/new_html.html').read() old_file = html2text.html2text(html1) new_file = html2text.html2text(html2) #write text into txt files old_text = open('/home/giancampo/diff-html/old_text.txt', 'w') new_text = open('/home/giancampo/diff-html/new_text.txt', 'w') old_text.write(old_file) new_text.write(new_file) old_text.close() new_text.close() ... ▪ ▪
  19. ... #send an email if the script has found differences

    if filecmp.cmp('/home/giancampo/diff-html/old_text.txt', '/home/giancampo/diff-html/new_text.txt') == True: print 'no emails sent' else: gmail_user = ‘YOUR-GMAIL-ADDRESS' gmail_password = YOUR-GMAIL-PASSWORD' sent_from = gmail_user to = ['[email protected]'] subject = 'Changes in the homepage!' body = _diff email_text = '''From: %s\nTo: %s\nSubject: %s\n\n%s''' % (sent_from, ', '.join(to), subject, body) server = smtplib.SMTP_SSL('smtp.gmail.com', 465) server.ehlo() server.login(gmail_user, gmail_password) server.sendmail(sent_from, to, email_text) server.close() print 'Email sent!' #files closing diff_file.close() ▪ ▪ ▪
  20. ▪ ▪ ... if __name__ == '__main__': # Initialize client

    object. adwords_client = adwords.AdWordsClient.LoadFromStorage("C:\\Users\\gianl\\AppDa ta\\Local\\Programs\\Python\\Python37\\_i miei script\\adwords-api\\googleads.yaml") adwords_client.SetClientCustomerId('ENTER-YOURS-HERE') kwds = open("kwds.txt","r") reload(sys) sys.setdefaultencoding('utf-8') for line in kwds: item = line.strip() results_file = open("results.txt", "a+") main(adwords_client, item, int(AD_GROUP_ID) if AD_GROUP_ID.isdigit() else None) print(datetime.datetime.now()) results_file.close() sleep(2)