Best Practices for Python Scripting

Best Practices for Python Scripting Greg Back PyOhio 2015 @gtback
/gtback

The Problem Find the 10 words that occur most frequently
in the Gettysburg Address.

Let’s play in the REPL Python 2.7.10 (default, Jun 10
2015, 19:42:47) [GCC 4.2.1 Compatible Apple LLVM 6.1.0 (clang-602.0.53)] on darwin Type "help", "copyright", "credits" or "license" for more information. >>> with open('gettysburg.txt') as f: ... data = f.read() ... >>> import collections >>> words = collections.defaultdict(int) >>> for word in data.split(): ... word = word.strip('-,.').lower() ... words[word] += 1 ... >>> import operator >>> word_freqs = sorted(words.items(), key=operator.itemgetter(1), reverse=True) >>> with open('results.txt', 'w') as f: ... for x in word_freqs[:10]: ... f.write(str(x) + '\n') ... print(x) ...

And the answer is... ('that', 13) ('the', 11) ('we', 10)
('to', 8) ('here', 8) ('a', 7) ('nation', 5) ('not', 5) ('for', 5) ('can', 5)

Convert to a script $ cat wordfreq.py with open('gettysburg.txt') as
f: data = f.read() import collections words = collections.defaultdict(int) for word in data.split(): word = word.strip('-,.').lower() words[word] += 1 import operator word_freqs = sorted(words.items(), key=operator.itemgetter(1), reverse=True) with open('results.txt', 'w') as f: for x in word_freqs[:10]: f.write(str(x) + '\n') print(x) python wordfreq.py

#!/usr/bin/env python """ wordfreq.py - Count the frequency of words
in text. """ import collections import operator # Read text with open('gettysburg.txt') as f: data = f.read() # Collect occurences of each word words = collections.defaultdict(int) for word in data.split(): word = word.strip('-,.').lower() words[word] += 1 # Sort the word data from most frequent to least frequent. word_freqs = sorted(words.items(), key=operator.itemgetter(1), reverse=True) # Output 10 most frequent words. with open('results.txt', 'w') as f: for x in word_freqs[:10]: f.write(str(x) + '\n') print(x) Clean up ./wordfreq.py python -c "import wordfreq"

... import collections import operator def main(): # Read text
with open('gettysburg.txt') as f: data = f.read() # Collect occurences of each word words = collections.defaultdict(int) for word in data.split(): word = word.strip('-,.').lower() words[word] += 1 # Sort the word data from most frequent to least frequent. word_freqs = sorted(words.items(), key=operator.itemgetter(1), reverse=True) # Output 10 most frequent words. with open('results.txt', 'w') as f: for x in word_freqs[:10]: f.write(str(x) + '\n') print(x) main() Add main function

... def main(): ... # Output 10 most frequent words.
with open('results.txt', 'w') as f: for x in word_freqs[:10]: f.write(str(x) + '\n') print(x) if __name__ == '__main__': main() “Protect” main function ./wordfreq.py python -c "import wordfreq"

... def read_text(): """Read text""" ... def count_words(data): """Count occurences
of each word""" ... def sort_words(words): """Sort the word data from most frequent to least frequent.""" ... def output_results(word_freqs): """Output 10 most frequent words.""" ... def main(): """Main wordfreq function.""" data = read_text() words = count_words(data) word_freqs = sort_words(words) output_results(word_freqs) ... Split up logic

... def output_results(word_freqs, outstream): """Output 10 most frequent words.""" for
x in word_freqs[:10]: outstream.write(str(x) + '\n') def main(): """Main wordfreq function.""" if len(sys.argv) < 2: data = sys.stdin.read() else: data = read_text(infile=sys.argv[1]) words = count_words(data) word_freqs = sort_words(words) if len(sys.argv) < 3: output_results(word_freqs, outstream=sys.stdout) else: with open(sys.argv[2], 'w') as outfile: output_results(word_freqs, outstream=outfile) ... Parameterize input and output files ./wordfreq.py gettysburg.txt results.txt cat alice.txt | ./wordfreq.py > alice_results.txt

import argparse ... def parse_args(): """Parse arguments from the command
line.""" parser = argparse.ArgumentParser("Count the frequency of words in text.") parser.add_argument('infile', nargs="?", help="Source text. Default: STDIN") parser.add_argument('outfile', nargs="?", help="Result destination. Default: STDOUT") return parser.parse_args() def main(): """Main wordfreq function.""" args = parse_args() if not args.infile: data = sys.stdin.read() else: data = read_text(infile=args.infile) words = count_words(data) word_freqs = sort_words(words) if not args.outfile: output_results(word_freqs, outstream=sys.stdout) else: with open(args.outfile, 'w') as outfile: output_results(word_freqs, outstream=outfile) ... Switch to using argparse ./wordfreq.py -h

... def output_results(word_freqs, outstream, count=10): """Output 10 most frequent words."""
for x in word_freqs[:count]: outstream.write(str(x) + '\n') def parse_args(): """Parse arguments from the command line.""" parser = argparse.ArgumentParser("Count the frequency of words in text.") parser.add_argument('-c', '--count', type=int, default=10, help="Number of words to report. Default: 10") parser.add_argument('infile', nargs="?", help="Source text. Default: STDIN") parser.add_argument('outfile', nargs="?", help="Result destination. Default: STDOUT") return parser.parse_args() def main(): """Main wordfreq function.""" ... if not args.outfile: output_results(word_freqs, outstream=sys.stdout, count=args.count) else: with open(args.outfile, 'w') as outfile: output_results(word_freqs, outstream=outfile, count=args.count) ... Parameterize number of words ./wordfreq.py -c 20 gettysburg.txt

import os ... def parse_envvars(): """Get options from environment variables."""
return { 'count': os.environ.get('WF_COUNT'), 'infile': os.environ.get('WF_INFILE'), 'outfile': os.environ.get('WF_OUTFILE'), } ... Environment Variables (1/2) WF_COUNT=20 ./wordfreq.py gettysburg.txt

... def wordfreq(count=10, infile=None, outfile=None): if not infile: data =
sys.stdin.read() else: data = read_text(infile=infile) words = count_words(data) word_freqs = sort_words(words) if not outfile: output_results(word_freqs, outstream=sys.stdout, count=count) else: with open(outfile, 'w') as outfile: output_results(word_freqs, outstream=outfile, count=count) ... Split functional code from options

... def main(): """Main wordfreq function.""" args = parse_args() env
= parse_envvars() count = int(args.count or env['count']) or 10 infile = args.infile or env['infile'] or None outfile = args.outfile or env['outfile'] or None wordfreq(count, infile, outfile) ... Environment Variables (2/2)

import ConfigParser ... def parse_config(conffile): """Get options from a config
file.""" config = ConfigParser.ConfigParser() config.read(conffile) options = {} if config.has_option('wordfreq', 'count'): options['count'] = config.getint('wordfreq', 'count') if config.has_option('wordfreq', 'infile'): options['infile'] = config.get('wordfreq', 'infile') if config.has_option('wordfreq', 'outfile'): options['outfile'] = config.get('wordfreq', 'outfile') return options def main(): ... conf = parse_config('wordfreq.conf') count = int(args.count or env['count'] or conf.get('count')) or 10 infile = args.infile or env['infile'] or conf.get('infile') or None outfile = args.outfile or env['outfile'] or conf.get('outfile') or None wordfreq(count, infile, outfile) ... Configuration File $ cat wordfreq.conf [wordfreq] count: 3 ./wordfreq.py gettysburg.txt

... def main(): ... try: wordfreq(count, infile, outfile) return 0
except: print >> sys.stderr, "An error occurred" return 1 if __name__ == '__main__': sys.exit(main()) Use return code ./wordfreq.py missing.txt

$ cat setup.py from setuptools import setup setup( name="wordfreq", author="Greg
Back", version='0.0.1', pymodules=['wordfreq'], entry_points={ 'console_scripts': ['wordfreq = wordfreq:main'] } ) Add setup.py pip install wordfreq wordfreq -c 15 gettysburg.txt python -c "import wordfreq; wordfreq.wordfreq('gettysburg.txt', count=25)"

Summary 1. Convert scripts to importable modules 2. Parameterize input/output.
Permit STDIN/STDOUT. 3. Gather options from ◦ arguments ◦ environment variables ◦ config files 4. Split core logic from option parsing 5. Use proper return values 6. Make module installable

Best Practices for Python Scripting

Best Practices for Python Scripting

Greg Back

More Decks by Greg Back

Other Decks in Programming

Featured

Transcript

Best Practices for Python Scripting Greg Back PyOhio 2015 @gtback

The Problem Find the 10 words that occur most frequently

Let’s play in the REPL Python 2.7.10 (default, Jun 10

And the answer is... ('that', 13) ('the', 11) ('we', 10)

Convert to a script $ cat wordfreq.py with open('gettysburg.txt') as

#!/usr/bin/env python """ wordfreq.py - Count the frequency of words

... import collections import operator def main(): # Read text

... def main(): ... # Output 10 most frequent words.

... def read_text(): """Read text""" ... def count_words(data): """Count occurences

... def output_results(word_freqs, outstream): """Output 10 most frequent words.""" for

import argparse ... def parse_args(): """Parse arguments from the command

... def output_results(word_freqs, outstream, count=10): """Output 10 most frequent words."""

import os ... def parse_envvars(): """Get options from environment variables."""

... def wordfreq(count=10, infile=None, outfile=None): if not infile: data =

... def main(): """Main wordfreq function.""" args = parse_args() env

import ConfigParser ... def parse_config(conffile): """Get options from a config

... def main(): ... try: wordfreq(count, infile, outfile) return 0

$ cat setup.py from setuptools import setup setup( name="wordfreq", author="Greg

Summary 1. Convert scripts to importable modules 2. Parameterize input/output.