Automazione per la SEO: esempi pratici da un non-developer

▪ ▪ ▪

<!DOCTYPE html> <html> <head> </head> <body> <h1>My First Heading</h1> <a
href=“link1.html”>link1</a> <a href=“link2.html”>link2</a> <p>testo</p> </body> </html> /html/body/a[1]/@href

/axis::node-test[predicate]/axis::node-test[predicate]/axis::node-test[predicate] ▪ → → → /locationstep/locationstep/locationstep

/html/body/a[1]/@href /child::html/child::body/child::a[1]/attribute::href

▪ <nome-tag>esempio di contenuto</nome-tag> ▪ <nome-tag attributo=“valore-attributo”>esempio di contenuto</nome-tag> ▪
//nome-tag/@attributo

▪ → → ▪ → contains(str1, str2) → starts-with(str1, str2)
/html/body/a[1]/@href //a[1]/@href //a[contains(@href, "link1.html")] //a[starts-with(@href, "link1")]

▪ //*[@id="menu-item-5015"]/a ▪ //ul[@id="menu-primary-items"]/li/a

=IMPORTXML(url, xpath_query) ▪ ▪

=XPathOnUrl(url, xpath, attribute, xmlHTTPSettings, mode) ▪ ▪ ▪

▪ ▪

▪ ▪ ▪

▪ ▪

from lxml import html import requests urls = open("urls.txt", "r")
results_file = open("results.txt", "a+") for item in urls: url = item.rstrip("\n") page = requests.get(url) tree = html.fromstring(page.content) text = tree.xpath('//ul[@id="menu-primary-items"]/li/a/text()') results_file.write("%s,%s\n" % (url, text)) print ("SCRAPING " + url) print (text, "\n") results_file.close() ▪ ▪ ▪ ▪ ▪

//title //meta[@name="description"]/@content //link[@hreflang="it-IT"]/@href //link[contains(@hreflang, *)]/@href //link[@rel="canonical"]/@href //meta[@name="robots"]/@content //h1 //url/loc/text()

▪ → → → ▪

https://www.googleapis.com/an alytics/v3/data/ga ?ids=ga:XXXX&start-date=2019- 01-01&end-date=2019-03- 31&metrics=ga:sessions&filter s=ga:country==Italy&access_to ken=XXXX https://www.googleapis.com/an alytics/v3/data/ga ?ids=ga:XXXX
&start-date=2019-01-01 &end-date=2019-03-31 &metrics=ga:sessions &filters=ga:country==Italy &access_token=XXXX ▪ ▪ ▪

https://www.googleapis.com/an alytics/v3/data/ga ?ids=ga:XXXX &start-date=2019-01-01 &end-date=2019-03-31 &metrics=ga:sessions &filters=ga:country==Italy

https://www.googleapis.com/an alytics/v3/data/ga ?ids=ga:XXXX &start-date=2019-01-01 &end-date=2019-03-31 &metrics=ga:sessions &filters=ga:country==Italy ▪ ▪ ▪

▪

▪ ▪ ▪ ga:name operator expression ga:country == Italy

▪ ▪

▪ ▪ → → → →

▪ ▪ https://www.googleapis.com/webmasters/v3/s ites/XXXX/searchAnalytics/query { "startDate": "2019-01-01", "endDate": "2019-03-31", "dimensions":
["query"], "dimensionFilterGroups": [ { "filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": 25000 "startRow": 0 }

https://www.googleapis.com/webmasters/v3/s ites/XXXX/searchAnalytics/query { "startDate": "2019-01-01", "endDate": "2019-03-31", "dimensions": ["query"], "dimensionFilterGroups":
[ { "filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": 25000 "startRow": 0 }

{ "startDate": "2019-01-01", "endDate": "2019-03-31", "dimensions": ["query"], "dimensionFilterGroups": [ {
"filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": 25000 "startRow": 0 } ▪ ▪ ▪ https://www.googleapis.com/webmasters/v3/s ites/XXXX/searchAnalytics/query

"filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": 25000 "startRow": 0 } ▪ ▪ https://www.googleapis.com/webmasters/v3/s ites/XXXX/searchAnalytics/query

▪ ▪ ▪ "dimension": string, "operator": string, "expression": string "dimension":
country, "operator": equals, "expression": ITA

"filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": 25000 "startRow": 0 } ▪ ▪ https://www.googleapis.com/webmasters/v3/s ites/XXXX/searchAnalytics/query

"filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": 25000 "startRow": 0 } ▪ ▪ → → https://www.googleapis.com/webmasters/v3/s ites/XXXX/searchAnalytics/query

▪ ▪ ▪

▪ ▪

▪ ▪ ▪

... rowLimit = 25000 retrieve_search_queries = webmasters_service.searchanalytics().query( siteUrl='ENTER-YOURS-HERE', body={ "startDate":
"2019-01-01", "endDate": "2019-03-31", "dimensions": ["query"], "dimensionFilterGroups": [ { "filters": [ { "dimension": "country", "operator": "equals", "expression": "ITA" } ] } ], "aggregationType": "auto", "rowLimit": rowLimit } ).execute() results_file = open("results.txt", "a+") for i in range(0, rowLimit): keys = retrieve_search_queries['rows'][i]['keys'] impressions = retrieve_search_queries['rows'][i]['impressions'] clicks = retrieve_search_queries['rows'][i]['clicks'] ctr = retrieve_search_queries['rows'][i]['ctr'] position = retrieve_search_queries['rows'][i]['position'] print ("%s|%s|%s|%s|%s\n" % (keys, impressions, clicks, ctr, position)) results_file.write ("%s|%s|%s|%s|%s\n" % (keys, impressions, clicks, ctr, position)) results_file.close()

▪ ▪ ▪

▪ ▪ ▪ https://adwords.google.com/api/adwords/cm/ v201809/CampaignService <?xml version="1.0"?> <soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" xmlns:xsd="http://www.w3.org/2001/XMLSchema"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <soapenv:Header> ... </soapenv:Header> <soapenv:Body> ... </soapenv:Body> </soapenv:Envelope>

▪ ▪ ▪ → →

▪ ▪

▪ ▪ ▪ → → → − − →

▪ ▪ → →

▪ ▪ ▪

▪ ▪

▪ ▪ → → → →

▪ ▪ → → →

▪ ▪ → →

▪ ▪ ▪

▪ ▪ ▪ ▪ ▪ → → →

... def main(client, item, ad_group_id=None): # Initialize appropriate service. targeting_idea_service
= client.GetService( 'TargetingIdeaService', version='v201809') # Construct selector object and retrieve related keywords. selector = { 'ideaType': 'KEYWORD', 'requestType': 'STATS' } selector['requestedAttributeTypes'] = [ 'KEYWORD_TEXT', 'SEARCH_VOLUME'] offset = 0 selector['paging'] = { 'startIndex': str(offset), 'numberResults': str(PAGE_SIZE) } selector['searchParameters'] = [{ 'xsi_type': 'RelatedToQuerySearchParameter', 'queries': item }] # Language setting (optional). selector['searchParameters'].append({ # The ID can be found in the documentation: # https://developers.google.com/adwords/api/docs/appendix/languagecodes 'xsi_type': 'LanguageSearchParameter', 'languages': [{'id': '1004'}] }) # Location setting (optional). selector['searchParameters'].append({ # The ID can be found in the documentation: # https://developers.google.com/adwords/api/docs/appendix/geotargeting 'xsi_type': 'LocationSearchParameter', 'locations': [{'id': '2380'}] }) # Network search parameter (optional) selector['searchParameters'].append({ 'xsi_type': 'NetworkSearchParameter', 'networkSetting': { 'targetGoogleSearch': True, 'targetSearchNetwork': False, 'targetContentNetwork': False, 'targetPartnerSearchNetwork': False } }) ▪ ▪ ▪ ▪ ▪ ▪ ▪

... # Display results. if 'entries' in page: for result
in page['entries']: attributes = {} for attribute in result['data']: attributes[attribute['key']] = getattr(attribute['value'], 'value', '0') results_file.write('%s|%s|%s\n' % (item, attributes['KEYWORD_TEXT'], attributes['SEARCH_VOLUME'])) print ('%s|%s|%s' % (item, attributes['KEYWORD_TEXT'], attributes['SEARCH_VOLUME'])) print else: print ('No related keywords were found.') offset += PAGE_SIZE selector['paging']['startIndex'] = str(offset) more_pages = offset < int(page['totalNumEntries']) if __name__ == '__main__': # Initialize client object. adwords_client = adwords.AdWordsClient.LoadFromStorage("ABSOLUTE-PATH-TO-googleads.yaml") adwords_client.SetClientCustomerId('ENTER-YOURS-HERE') kwds = open("kwds.txt","r") #reload(sys) #sys.setdefaultencoding('utf-8') for line in kwds: item = line.strip() results_file = open("results.txt", "a+") main(adwords_client, item, int(AD_GROUP_ID) if AD_GROUP_ID.isdigit() else None) print(datetime.datetime.now()) results_file.close() sleep(2) ▪ ▪ ▪ ▪

... # Construct selector object and retrieve related keywords. selector
= { 'ideaType': 'KEYWORD', 'requestType': ‘IDEAS' } selector['requestedAttributeTypes'] = [ 'KEYWORD_TEXT', 'SEARCH_VOLUME'] offset = 0 selector['paging'] = { 'startIndex': str(offset), 'numberResults’: 10 } ...

from lxml import html import requests urls = open("urls.txt", "r")
results_file = open("results.txt", "w") for item in urls: url = item.rstrip("\n") page = requests.get(url) tree = html.fromstring(page.content) text = tree.xpath('//h3[@class="r"]/a/@href') results_file.write("%s,%s\n" % (url, text)) print ("SCRAPING " + url) print (text, "\n") results_file.close() ▪ ▪ ▪ ▪

▪ ▪ ▪ ▪ https://www.google.[com]/search?q=site:[dominio]&start=[#pagina]&...

▪ ▪ ▪ /url?q=http://www.simpleagency.it/&sa=U&ved=0ahUKEwizuOnv1YTiAhU9GLkGHQUZAe8QFggUMAA& usg=AOvVaw2SLUR7xqI7OaMms1_bXQ3h

... #download and store new html file os.rename('/home/giancampo/diff-html/new_html.html', '/home/giancampo/diff-html/old_html.html') url
= ‘YOUR-HOMEPAGE-URL' response = urllib2.urlopen(url) webContent = response.read() f = open('/home/giancampo/diff-html/new_html.html', 'w') f.write(webContent) f.close() #convert html to txt files html1 = open('/home/giancampo/diff-html/old_html.html').read() html2 = open('/home/giancampo/diff-html/new_html.html').read() old_file = html2text.html2text(html1) new_file = html2text.html2text(html2) #write text into txt files old_text = open('/home/giancampo/diff-html/old_text.txt', 'w') new_text = open('/home/giancampo/diff-html/new_text.txt', 'w') old_text.write(old_file) new_text.write(new_file) old_text.close() new_text.close() ... ▪ ▪

... #send an email if the script has found differences
if filecmp.cmp('/home/giancampo/diff-html/old_text.txt', '/home/giancampo/diff-html/new_text.txt') == True: print 'no emails sent' else: gmail_user = ‘YOUR-GMAIL-ADDRESS' gmail_password = YOUR-GMAIL-PASSWORD' sent_from = gmail_user to = ['[email protected]'] subject = 'Changes in the homepage!' body = _diff email_text = '''From: %s\nTo: %s\nSubject: %s\n\n%s''' % (sent_from, ', '.join(to), subject, body) server = smtplib.SMTP_SSL('smtp.gmail.com', 465) server.ehlo() server.login(gmail_user, gmail_password) server.sendmail(sent_from, to, email_text) server.close() print 'Email sent!' #files closing diff_file.close() ▪ ▪ ▪

▪ ▪ ▪ → → ▪

▪ ▪

▪ ▪ ... if __name__ == '__main__': # Initialize client
object. adwords_client = adwords.AdWordsClient.LoadFromStorage("C:\\Users\\gianl\\AppDa ta\\Local\\Programs\\Python\\Python37\\_i miei script\\adwords-api\\googleads.yaml") adwords_client.SetClientCustomerId('ENTER-YOURS-HERE') kwds = open("kwds.txt","r") reload(sys) sys.setdefaultencoding('utf-8') for line in kwds: item = line.strip() results_file = open("results.txt", "a+") main(adwords_client, item, int(AD_GROUP_ID) if AD_GROUP_ID.isdigit() else None) print(datetime.datetime.now()) results_file.close() sleep(2)

Automazione per la SEO: esempi pratici da un no...

Automazione per la SEO: esempi pratici da un non-developer

More Decks by Gianluca Campo

Other Decks in Marketing & SEO

Featured

Transcript