Python 2 is deprecated! Try Python 3 Tour
Python 2 Docs

Interactive Python 2 Tutorial ▲▼ TOC

◀{{chapter}}▶

{{t.title}}

Source at GitHub | by Chris Monson

◀{{chapter}}▶

<< {{chapter}}

RUN ▼

Clear Output Window
Clear Code Window
Show Changes
Forget Changes (this slide)
Forget ALL Changes (every slide)

Running for the first time. This will take a few seconds.

Loading Python

import sys import contextlib import StringIO # This is intended to be executed in the same environment as the script. # Among other things, it creates a doctest implementation and cleans up the # namespace from the previous run. class _testmod(object): sys = sys StringIO = StringIO def __init__(self): self.failed = 0 self.succeeded = 0 for s in self.__find_docstrings(): self.__test_doc_string(s) def __parse_doc_string(self, s): # States: # None (found >>>) -> Command [add to collected command # None (default) -> None # Command (found >>>) -> Command [add to collected command] # Command (found ...) -> Command [add to collected command] # Command (blank) -> None [store collected command, store collected result] # Command (default) -> Result [store collected command, add to collected result] # Result (found >>>) -> Command [store collected result, add to collected command] # Result (empty line) -> None [store collected result] # Result (default) -> Result [add to collected result] class Result: pass class Command: pass # Look for instances of >>> and find the text that follows (allowing ellipses). state = None commands = [] results = [] indentation = None collected_result = [] collected_command = [] lines = s.split('\n') # Add a blank at the end to make states simpler. for line in lines + ['']: ls = line.lstrip() if state is None: if ls.startswith('>>> '): indentation = line[:len(line) - len(ls)] collected_command.append(ls[4:]) state = Command elif state is Command: if ls.startswith('>>> '): if line[:len(line) - len(ls)] != indentation: raise ValueError("invalid command continuation indentation: %r" % line) collected_command.append(ls[4:].rstrip('\r\n')) elif ls.startswith('... '): if not line.startswith(indentation + '... '): raise ValueError("invalid command contination indentation: %r" % line) collected_command[-1] += '\n' + ls[4:].rstrip('\r\n') elif not ls: commands.append(collected_command) results.append(None) collected_command = [] collected_result = [] state = None else: if not line.startswith(indentation): raise ValueError("invalid result indentation: %v", line) commands.append(collected_command) collected_command = [] collected_result = [ls.rstrip('\r\n')] state = Result elif state is Result: if ls.startswith('>>> '): indentation = line[:len(line) - len(ls)] results.append(collected_result) collected_result = [] collected_command = [ls[4:].rstrip('\r\n')] state = Command elif not ls: results.append(collected_result) collected_result = [] state = None else: if not line.startswith(indentation): raise ValueError("invalid result indentation: %r" % line) collected_result.append(ls.rstrip('\r\n')) if len(commands) != len(results): raise ValueError("invalid doctest - different number of commands from results:\n%r\n%r" % ( commands, results)) tests = [] for c, r in zip(commands, results): # See if more than one instance of ellipses made it into any result. if r is not None: numellipses = sum(1 for x in r if x == '...') if numellipses > 1: raise ValueError("too many ellipsis lines in result %v", r) tests.append({'command': c, 'results': r}) return tests @contextlib.contextmanager def __redirect_stdio(self): out = self.StringIO.StringIO() err = self.StringIO.StringIO() old_out, self.sys.stdout = self.sys.stdout, out old_err, self.sys.stderr = self.sys.stderr, err try: yield old_out, old_err, out, err finally: self.sys.stdout, self.sys.stderr = old_out, old_err def __format_exc(self): def extract_tb(tb, limit=None): def getline(filename, lineno, *args): if filename.startswith('<'): return None return open(filename).readlines()[lineno-1] frame_info = [] n = 0 inTopLevel = False while tb is not None and (limit is None or n < limit): f = tb.tb_frame lineno = tb.tb_lineno co = f.f_code filename = co.co_filename name = co.co_name if not inTopLevel and name == '<module>': inTopLevel = True line = getline(filename, lineno, f.f_globals) if inTopLevel and filename == '<string>': if '__lines__' in globals(): if len(__lines__) >= lineno: line = __lines__[lineno-1] if line: line = line.strip() else: line = None frame_info.append((filename, lineno, name, line)) tb = tb.tb_next n = n+1 return frame_info def format_list(entries): formatted = [] for filename, lineno, name, line in entries: item = ' File {!r}, line {}, in {}\n'.format(filename, lineno, name) if line: item += ' {}\n'.format(line.strip()) formatted.append(item) return formatted t, e, tb = self.sys.exc_info() lastline = '{}: {}'.format(t.__name__, e.message) entries = ["Traceback (most recent call last):\n"] + format_list(extract_tb(tb)) + [lastline] return ''.join(entries) def __run_test(self, test, environ={}): command = test['command'] expected = test['results'] with self.__redirect_stdio() as (old_out, old_err, out, err): for cmdstr in command: failed = False try: result = eval(cmdstr, environ) if result is not None: print repr(result) except SyntaxError, synerr: try: exec cmdstr in environ except Exception: print >>err, self.__format_exc() failed = True except Exception: print >>err, self.__format_exc() failed = True finally: result = out.getvalue() + err.getvalue() if failed: break if result is None: result = '' if result.endswith('\n'): result = result[:-1] result = result.split('\n') if expected is None: expected = [] expanded_expected = expected if len(result) > len(expected) and '...' in expected: # Search for ellipses, and expand them to make matching easier. loc = expected.index('...') expanded_expected = expected[:loc] + ['...'] * (len(result) - loc - 1) + expected[loc+1:] failed = True def format_failure(command, expected, result): def format_lines(lines): out = [] for line in lines: # Some "lines" are multi-line commands (like loops). out.extend('\t{}'.format(l) for l in line.split('\n')) return out lines = ["Failed example:"] lines.extend(format_lines(command)) if not expected: lines.append("Expected nothing") else: lines.append("Expected:") lines.extend(format_lines(expected)) if not result: lines.append("Got nothing") else: lines.append("Got:") lines.extend(format_lines(result)) return '\n'.join(lines) if len(result) != len(expanded_expected): print format_failure(command, expected, result) else: for r, e in zip(result, expanded_expected): if r != e and e != '...': print format_failure(command, expected, result) break else: failed = False return not failed def __test_doc_string(self, s): tests = self.__parse_doc_string(s) environ = globals().copy() for t in tests: if self.__run_test(t, environ): self.succeeded += 1 else: self.failed += 1 print def __find_docstrings(self, vardict=None): if vardict is None: vardict = globals() if '__doc__' in vardict: yield vardict['__doc__'] for val in vardict.itervalues(): if hasattr(val, '__doc__') and val.__doc__: yield val.__doc__ if type(val) in ('type', 'classobj'): for d in self.__find_docstrings(val.__dict__): yield d # Clean up the namespace, make sure that help and _testmod make it where they belong. from pydoc import help __builtins__.__dict__['help'] = help __builtins__.__dict__['_testmod'] = _testmod _kill = set(vars().keys()) for _k in _kill: if _k not in ('__builtins__', '__package__', '__nam__', '__doc__', '_k'): del vars()[_k] del _k del _kill __name__ = '__main__'

"""Hello, Python! Welcome to Python, a very fun language to use and learn! Here we have a simple "Hello World!" program. All you have to do is print, and you have output. Try running it now, either by clicking *Run*, or pressing *Shift-Enter*. What happened? This tutorial contains a *Python interpreter*. It starts at the top of your program (or _script_) and does what you tell it to until it reaches the bottom. Here, we have told it to do exactly one thing: **print** a **string** (text surrounded by quotation marks) to the output window, and it has. The word |print| is a special command in Python. It instructs the interperter to output what you tell it to. In this tutorial, we capture that output in the window below the code so that you can easily see it. We will get very comfortable with this as the tutorial goes on. Meanwhile, let's talk about the tutorial itself: - The Table of Contents is above, marked *TOC*. - *Page-Up* and *Page-Down* keys can be used to navigate. - Code can be run with the *Run* button or *Shift-Enter*. - Check out the options in the *Run* menu (the arrow). Among other things, you can see what you have changed from the original slide. The tutorial will try to remember those changes for a long time. Exercises - Try removing each of the quotation marks in turn. What happens? - Change the string to say hello specifically to you. - Print 'Hello, Python!' using two strings instead of one, like this: |print 'Hello', 'Python!'|. What did |print| do for you automatically? """ print 'Hello, Python!'

"""Comments Python ignores **comments**: bits of text that start with |#| and extend to the end of the line. They are strictly for humans to read, and do not affect how your program runs. # This is a comment. Comments are useful for making notes to yourself or others about how the program's code works, but should not be used as the primary form of documentation in Python (more on that later). Note that the leader |#| has to stand alone syntactically to form a comment; if it appears inside of a string, for example, it is just part of the text. Exercises - Print a string with a comment leader inside of it, e.g., |print "# a comment?"|. """ # This is a comment. Comments start with '#' and # extend to the end of the current line. print "hey" print # This just prints a blank line. print "hello"

"""Variables A **variable** is a place to remember something. You assign a value to a variable using the **assignment operator** (a single |=|) like this: a = "hi" Now the variable |a| contains the string |"hi"|. In Python, variables spring into existence when they are _assigned a value_. They do not exist before being assigned, so accessing one without first assigning it is an error. A valid **variable name** can contain letters, numbers, and the |_| character, but cannot begin with a number. A variable can contain any kind of value. Exercises - Try assiging to a variable name that starts with a number (like 1eet). See what happens. - Assign a new variable to an existing one, e.g., |b = a|. Print it. """ a = "hi there" # 'a' now contains a string of text. print a a = 10 # 'a' now contains an integer number. print a my_longer_varname = 14 print my_longer_varname # Until assigned, variables cannot be accessed. print i_dont_exist # Not yet assigned!

"""Strings Strings basically contain text. They are delimited, as was seen in the "Hello" example, with quote marks. There are four fundamental quote styles: "Double-quotes" 'Single-quotes' ""\"Three double quotes make multi-line strings.""\" '''Three single quotes work the same way.''' Exercises - Run the program and note the indentation problem. Fix it. """ __doc__ = """Multi-line Strings Such strings are often used to write documentation for modules, functions, and classes. """ print 'Single-quoted string.' # single quotes work print "Double-quoted string." # double quotes, too print '''Multi-line strings may not do what you think, particularly with indentation.'''

"""String Formatting A quick detour is in order, here, since we want to do interesting things with strings besides printing out constants. A string can be *formatted* using the |%| operator thus: "I have %d oranges, but only %d apples" % (5, 3) What is up with |"%d"| and the |%| operator on a string? When applied to integers, |%| computes the _modulus_ (division remainder), but when operating on a string, it does _substitution_ (for those familiar with C, this is _printf-style_). Some examples are in the code window. There are actually many other format specifiers (the |%| inside the strings), too many to go into in this tutorial, but the most common ones are - %d: formats an integer (digits) - %s: formats anything into a string - %r: formats anything using |repr()| Full documentation is available here: http://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting The |%|-formatting is old and on its way out, but still plenty popular and ubiquitous, so understanding it is worthwhile. |str.format| is pretty neat, so take a look at the documentation for it when you feel like some heavy reading: http://docs.python.org/2/library/string.html#formatstrings We'll use a bit of both, but |str.format| has many advantages (including increased clarity), so we'll gravitate to it over time. """ import math # If you only have one format specifier, and the right # side is also a string, you can omit the tuple syntax. print "Hi there, %s!" % "you" # Integers get to use %d (for "decimal" - %x would be # "hex"). print "Base 10: %d, Base 16: %x, Base 16: %X" % (30, 30, 30) # Floating point is %f or %g, and I can never remember # which one I want, so I go with %f most of the time # unless it frustrates me enough to dig through the # docs. print "A floating point number: %f" % (math.pi) # You can also specify width and such with numeric # types. print "A width-constrained number: %.2f" % (math.pi) # And, you can get the repr of anything by using %r. print "The repr of a few things: %r %r %r %r" % ('hi', 26j, 17.4, len) # There is also a whole new way of formatting strings # that is really nice and super cool and has loads of # flexibility and documentation: str.format. # Definitely look up the docs on this. It has a lot of # nice features, and it's the Way Of The Future (TM). print "This is the {0}th time of {1}.".format(17, 30)

"""Module Docstrings If the first statement in a file is a string, Python uses it as documentation. This is called a **docstring**. Since documentation usually takes up more than one line of text, these use the triple-quoting format discussed earlier. These strings are available to your program, but more importantly, they can be used to produce human-readable documentation for everything you do. We'll make use of them throughout the rest of the tutorial. Exercises - Try printing the special variable |__doc__|. """ __doc__ = """This is a module docstring. A module is basically a file. All of the code in this editor makes up a single module, a module that you define by typing Python statements. """ print "This is a module - where's the documentation?"

"""String Escaping There is no difference between |'| and |"| - they both form equivalent strings. People usually pick one based on preference, changing only to include quotes inside, like this: "Don't touch my quoting." 'I need to "work", now.' Occasionally, you need to include both kinds of quotes inside of a string. In these cases, you can **escape** quotes using a backslash: "This string contains the \\" delimiter." Strings accept other escape sequences, like |'\n'|, which inserts a line feed character, making a new line. More info can be found here: http://docs.python.org/2/reference/lexical_analysis.html#string-literals Exercises - Try creating a string that contains a backslash: it will need to be escaped. """ __doc__ = """A demonstration of escape sequences. This multi-line string is delimited with triple ""\", and tells you that by escaping at least one of them (otherwise the string would end early). """ print "This has a double quote \" inside." print 'This has a single quote \' inside.' print "This has a second line:\n And this is it." print print __doc__ # Where is the backslash?

"""Calling Functions Python has a lot of stuff built in that you can just use. Much of it is exposed through **functions**. A function is _called_ by placing |()| after its name. If it accepts **arguments**, then they go inside of the |()|. The |len| function demonstrated here accepts a single argument: the thing you want to know the length of. x = len("hello") # x gets the value 5 All Python functions **return a value**. In the case of |len|, this means that calling it produces a new value as above. You can assign that value to a variable, or print it, or pass it into another function. Or, you can ignore it and it will go away. To understand how function calls work, it helps to think of calling a function as *replacing it with the return value*. In the example above, that means that the entire call, from the name |len| to the closing paren, is replaced with the length of "hello", which is 5. When you see a function call anywhere and want to understand what it means, you can imagine working from the inside out, left to right, replacing calls with the values they return. x = len([1, 2, len('hi')]) # innermost is len('hi') - replace it: x = len([1, 2, 2]) # next is len([1, 2, 2]) - replace it: x = 3 # No more calls - we're done. If you ever see a statement or expression that has function calls in it, you can understand what is going on by following the above procedure in your mind: replace the innermost, calls with values (they can be pretend values - we're imagining for the sake of understanding, here). Then work to the right, then work outward and do it again until there are no calls left. Functions are very important in all of computer science, so taking the time to understand what is happening right now is very useful for what's coming up. Exercises - One important function in Python is |repr|, which prints a "representation" of an object. Try printing |repr("10")|. See how it differs from |repr(10)|. - Convert the string |"2000"| into an integer by calling |int|. """ __doc__ = """Calling Functions Note: If you don't use a return value, it gets lost. """ # Call 'len', ignore (and lose) its value. len("hi") # Assign 'length' to the return value of 'len'. length = len("how long is this anyway?") print "length =", length # We can print it directly, too. print "The length is", len("hi there") # The repr function can be useful to see what's # really in a string. It adds quotes for you. print "Just print:", "Hi there" print "repr print:", repr("Hi there")

"""Numbers There are several numeric types built into Python, including integers (types |int| and |long|), floating point numbers (type |float|), and complex numbers (type |complex|). 10 # This is an 'int' 10.5 # This is a 'float' 6 + 3.2j # This is a 'complex' The interactive Python interpreter makes a nice calculator, and unlike this tutorial, you don't even have to type |print| there - the |repr| of every operation is output automatically. Basic math is easy - you can do addition, subtraction, multiplication, division, and exponentiation, among other things. Parentheses do what you would expect. Exercises - Print the number of atoms in the sun, as a large integer: |119 * 10 ** 55|. Bonus Work - Try opening an interactive Python prompt (in a terminal, not here) and using it as a calculator. """ # Basic numeric types. print "I'm an int:", 10 print "I'm a float:", 2.79 print "I'm complex:", 3.14 + 1j # Math is easy. a = 1000.0 # Some basic math operators: print print "Basic Math Operators:" print "Div:", a / 10.0 # Divide by 10 print "Mul:", a * 10 # Multiply by 10 print "Add:", a + 12 # Add 12 print "Sub:", a - 15 # Subtract 15 print "Exp:", a ** 5 # Take a to the 5th power. # Grouping: print print "Parentheses:" print "Multiplication before addition:", 3 + 2 * 5 print "Force addition to come first:", (3 + 2) * 5

"""Math, Modules, and Namespaces We know how to do basic things like addition and multiplication, but how do we get at more interesting things like sines and cosines? Python comes with "batteries included", which means you can get a lot of functionality with just the basic installation. But that functionality is not all available unless you ask for it by **importing modules**. Here we import |math| and start to use some of the things inside of it. Note how we use the |.| operator to access things _inside_ of the |math| module. This works for any kind of **namespace** in Python (something that contains other named things); a module is just one of several kinds of namespaces. When understanding functions in a namespace that are called, you can think of the namespace.function as a single name, e.g., |math.sqrt| is the function name in the code window, and |2| is the argument to that function. Thus, like we discussed earlier about function calls, you can replace the entirety of |math.sqrt(2)| with its value - that is what happens when a function is run. Exercises - The |dir| function gives you a _directory_ of a namespace. Print |dir(math)| and see what you can find in there. - Compute |math.sin(math.pi)|. Did it give you the answer you expected? How close was it? (Hint: |1e-3| is |0.001|). """ __doc__ = """Importing Modules, Doing Math""" import math # My favorite constants. print math.pi print math.e # Another important one (a square root). print math.sqrt(2)

"""More on Importing Some of the functionality you want in Python may only be available via **packages**, which are containers for modules. Or, you may just not want to type the |.| all the time. For example, you may want to access |math.pi| a lot, but that is a lot of typing for a short and common symbol. When importing, you can choose which pieces you want imported using the |from ... import ...| syntax: from math import pi, e This imports the symbols |pi| and |e| from the |math| module into the current **global namespace** so you can just use them without extra typing. You can also use |*| in place of a name, which imports everyting the module knows about. *You should rarely, if ever do this*, but when you need it, it's there for you. """ __doc__ = """From ... import ... To import just one thing from a module or package, see below. """ from math import pi, e print "I know the digits of 'pi' just fine:", pi print print "Another beautiful, naturally occurring number:", e

"""Getting Help One function that is built into the interactive Python interpreter is |help|. This is a very useful function, because it gives you basic documentation on just about anything you want. For this in-browser tutorial, the *help* function is included, just to give you a feel for what it does. You can also run "pydoc" from the commandline, or access http://python.org/doc/ directly or from the link above. Exercises - Go to the *Docs* link, find Python 2.x, Library Reference, and click on |Built-in Functions| (direct link here: https://docs.python.org/2/library/functions.html). - Print |help(int)|. Did it output what you expected? - Import |math| and run |help| on the module. Compare with |dir|. """ __doc__ = """Getting Help Help is available by calling the 'help' function. It can sometimes be more useful than 'dir'. """ print "Help for 'len'" help(len) print "Help for symbols" help('symbols') print "Help for keywords" help('keywords')

"""Basic Conversions We have talked about strings and numbers, and alluded a bit to the fact that we can convert between them. You can convert between things like numbers and strings using the appropriate function calls, like |int("200")| or |str(1.1 ** 24)|. There are a number of these **callables** (things you can _call_, like functions, using |()|) that convert between different types. A few are listed here (there are many more): int float complex str list tuple Exercises - Print the result of |5 * 30|. - Now try it as |str(5) * 30|. What happened? - What about |"5" * "30"|? - You can provide a *numeric base* to |int|. Try printing |int("FACE", 16)|. This treats |FACE| as a hexadecimal value. """ # I have a string, but I want a number! num_str = " 178000 " # Yup, it's a string: print repr(num_str) # Can it be an int? print int(num_str) # spaces are stripped first. # How about a float? print float(num_str) # Of course, converting between numbers works: print float(10) # But what happens with this? print int(10.5) # We can even make complex values from strings: print complex("-2+3.2j") # This won't work: print int("234notanumber")

"""Equality Things are **equal** to each other if they have the _same values_. In Python, testing for equality is done using the |==| operator, and inequality is tested with |!=|. As you might expect, |10 != "10"| (one is an integer, the other is a string), but |10 == 5 + 5| (both sides are integers with the same value). With variables, things get a little bit more interesting. Suppose you have two index cards, each with the number |5| on them. Each card is a _variable_, and the "5" written on them is their _value_. Because they have the same values, they are **equal** in the |==| sense: they contain the same data. But they are not the same card. Now suppose I write "5" on one card, show it to you, and say "This is |a|". Then I show you _the same card again_, but say, "This is |b|". In this case, |a| and |b| are equal in the |is| sense: they are not only equal (|a == b|), they are also referring to the same card (|a is b|). This normally does not matter much, but you will use it when testing for Python's special "nothing" value called |None|. Exercises - There are other comparison operators, and they do what you'd expect, even on strings and other sequences. Experiment with |<|, |<=|, |>|, and |>=| - see what happens when you print something like |5 < 7| or |'hello' >= 'hello there'|. """ print "Strings are not equal to integers." print "10" != 10 # True print 10 == 5 + 5 # True print "Variable assignment satisfies 'is'" a = 1543 b = a print a == b # Obviously true - same data. print a != a+1 # Indeed. print a is b # Also true. Assignment satisfies 'is'. # Performing an operation on data like integers or # strings produces a *new thing*, even if the data is # the same. print "Same data, not same thingy." b = a + 0 print a == b # Still true. print a is b # No longer true! print "not None:", a is not None # A very common kind of test. c = None print c is not None # False

"""Tuples You have already seen one kind of sequence: the string. Strings are a sequence of one-character strings - they're strings all the way down. They are also **immutable**: once you have defined one, it can never change. Another immutable seqeunce type in Python is the **tuple**. You define a tuple by separating values by commas, thus: 10, 20, 30 # This is a 3-element tuple. They are usually set apart with parentheses, e.g., |(10, 20, 30)|, though these are not always required (the empty tuple |()|, however, does require parentheses). It's usually best to just use them. Tuples, as is true of every other Python sequence, support **indexing**, accessing a single element with the |[]| notation: print my_tuple[10] # Get element 10. Exercises - Create a one-element tuple and print it out, e.g., |a = 4,| (the trailing comma is required). - Try comparing two tuples to each other using standard comparison operators, like |<| or |>=|. How does the comparison work? """ # A basic tuple. a = 1, 3, 'hey', 2 print a # Usually you see them with parentheses: b = (1, 3, 'hey', 2) print b print "b has", len(b), "elements" # Indexing is easy: print "first element", b[0] print "third element", b[2] # Even from the right side (the 'back'): print "last element", b[-1] print "penultimate", b[-2] # Parentheses are always required for the empty # tuple: print "empty", () # And single-element tuples have to have a comma: print "singleton", (5,) # A tuple print "not a tuple", (5) # A number # They are immutable, though: you can't change # them. b[1] = 'new value' # oops

"""Lists Like tuples, **lists** are sequences of any kind of value, but unlike tuples, they are **mutable**: they can change contents and size after being created. To create a list, use |[]|: [1, 2, 3, 4] # A 4-element list. [] # An empty list. They are indexed in exactly the same way as any other sequence in Python, via the |[]| notation, but because they are mutable, you can *change their size* and *assign values to their elements*: a = [1, 3, 5, 7] a[1] = 'hello' # This works. Lists have lots of **methods** (functions in their namespace that you can use to manipulate them), like |append|: a.append(9) # Add 9 to the end of a. Exercises - See the code for examples of how to use lists. Play with it a bit. - Use the |str.join| function to join a list of strings together. For example, what does |'\\n'.join(["hi", "there"])| do? Try different **delimiter strings** (in place of |'\\n'|). """ # Create a list using [] notation. a = [7, 3, 1, 9] print a print "a has", len(a), "elements" # Indexing works as expected. print "third element", a[2] print "last element", a[-1] # List are mutable: a[3] = "hello" # Change element 3. print a # And you can add to them. There are lots more of these # operations - see help(list). a.append("new value") print a # Sorting is one of those really useful list things: a.sort() print a # Extending is another: a.extend(['more', 'values']) print a

"""Slicing You can get or set the individual elements of a sequence by using |[]| to index into it. But this is just a special case of **slicing**. Slicing allows you to specify a _range_ of elements in a sequence, even for assignment where the underlying sequence is mutable. The most basic slice is |[start:end]| where |start| is *inclusive*, and |end| is *exclusive*: |[2:6]| takes everything starting at element |2|, up to *but not including* element |6|. There is an extended syntax with two colons, as well: |[start:end:step]| means you want to take everything in [start,end), but you only want every step-th element. As a quick note, the |range| function can be used to quickly produce a list of numbers, and its arguments are similar to those of slices. Exercises - Try the |range| function with 1, 2, or 3 arguments. See what it does. - Try reversing a list using slice notation (Hint: copy the list with a negative step count). - Try taking every third element of the reversed list. """ # The range(10) function produces all numbers in [0,10) # (like slices, the right endpoint is excluded). numbers = range(10) print numbers # A simple slice. print "3:8", numbers[3:8] # A slice containing one element. print "2:3", numbers[2:3] # just one element # But it's really useful because you can assign to # it. numbers[2:3] = [11, 12, 13, 14] print numbers # Even an empty slice is useful for assignment: a = [1, 2, 5, 6] a[2:2] = [3, 4] print "Assigned to empty slice and got", a # If you omit one of the slice numbers, it defaults to # the corresponding endpoint. Negative values work, # too. print "from the beginning to 4", numbers[:4] print "all but the last two", numbers[:-2] print "from 3 to the end", numbers[3:] print "everything - a complete copy", numbers[:] print "every other element", numbers[::2]

"""Dictionaries Sequences are very useful, but they just hold collections of stuff. Also, they're typically all about order (|set| being an obvious exception). **Dictionaries** (type |dict|), on the other hand, give a name to every piece of data within them. That name can be a string, or a number, or even a tuple (with the "hashable" caveat, but that's a different discussion). The name is called a "key". Dictionaries are typically created with with |{}| notation, with each element being a |key: value| pair. You can also create a dictionary by calling |dict|. To access an element of a dictionary, use the |[]| indexing notation, but instead of a number, give it a key. Note that slices are meaningless for dictionaries, and therefore are not supported. Unlike |list| and |tuple|, when iterating over or otherwise outputting a dictionary, order is _undefined_ and _unreliable_. Don't count on order. Exercises - There is a lot of content in the code - read through it and see if you can guess what it will output before running it. """ number_of_children = {"John": 6, "Mary": 2} # Empty is also allowed. print "After initialization:", number_of_children print "John has", number_of_children["John"], "children" # You can also create new items with index assignment: number_of_children["George"] = 12 print "Added George:", number_of_children print # There are many useful methods in dictionaries. print number_of_children.keys() # list of keys print number_of_children.values() # list of values print number_of_children.items() # list of key,value pairs print # The 'in' operator always applies to the keys, # never the values. print "George" in number_of_children # True print "Simon" in number_of_children # False print # Using the dict type to create a dictionary: d1 = dict() d1["key1"] = "value1" print d1 # You can also create a dictionary from a sequence # of key/value pairs using the dict callable type: d2 = dict([("K1", "v1"), ("K2", "V2")]) print d2

"""Functions We know how to call **functions** like |len| and |bool| to get information about stuff, so now we're going to learn how to write our own. Functions are _defined_ using the |def| statement. They have a name, a list of argument names in parentheses, a colon, and are always followed by an indented code block. To **return** a value from a function, you use the |return| statement. It can return any kind of value, including tuples, which are commonly used to package up and return multiple values. It is important to note that all functions return *exactly one value*. If you return multiple things separated by commas, you are really returning a single tuple of values. If you don't return anything, you are implicitly returning the value |None|. So remember: *functions always return exactly one value*. Also note that when |return| executes, the function *terminates immediately*. Remember how we talked about understanding a function call by replacing it with the thing it returns?See if you can predict what |times3(times3(2))| becomes by doing the mental replacement exercise we outlined earlier. The neat thing is that this time, you can *see* what |times3| returns because the definition of it is right there in the code window. There is no need to pretend. Exercises - Put a |print| statement into the |swapped| function. Call it without assigning its result to anything. - Change the |ordered| function to use |else| instead of relying on the early exit behavior of |return|. - Figure out why |swapped(swapped(1, 2))| does not work - do this by mentally performing replacement steps. HINT: every function always returns a single value, every time, no exceptions. When returning multiple values, the function is really returning a single tuple containing those values. """ # This is a basic function that accepts one # argument and returns that argument times 3. # As a *side effect*, it also prints what it is # doing. def times3(x): print "Hey - I'm multiplying {} by 3".format(x) return x * 3 # Now that times3 is defined, we can call it as # much as we like: print times3(12) print times3(6) # A function that returns its two arguments # swapped. Note that it returns two values by # returning a tuple (parentheses optional). # def swapped(a, b): return b, a # This one returns the arguments in order. # Note how it uses the fact that "return" exits # immediately to get its logic right. # def ordered(a, b): if a > b: return b, a return a, b print "swapping", swapped(10, 20) print "swapping", swapped('hello', 'aardvark') print "ordering", ordered('more', 'less') print "ordering", ordered((1,3,5), (1,2)) # When passing tuples *out* of a function, you can # "unpack" them into new variables in one step. x, y = swapped(1, 2) print "unpacked", x, y # Wait, why doesn't this work? print "ordered, swapped", ordered(swapped(1, 2))

"""Argument Unpacking We saw when creating our own function that chaining simple tuple-returning functions didn't work as expected. Taking |ordered(swapped(...))| just doesn't work, because |swapped| returns _one tuple_, and |ordered| expects _two arguments_. To make the call, you have to first unpack the result then send its values separately. You can do this with an **unpacking assignment**, like this: x, y = swapped(3, 6) print ordered(x, y) Fortunately, there is another less cumbersome way to do it that is more convenient. If you prefix the argument with |*|, Python will unpack the value into function arguments in one step: print ordered(*swapped(3, 6)) Exercises - Try calling |ordered(*(3, 2, 1))|. What happens? Why? """ def swapped(a, b): return b, a def ordered(a, b): if a > b: return b, a return a, b # You can always do this via unpacking assignment: x, y = swapped("hi", "there") print ordered(x, y) # But this is easier. print ordered(*swapped(1, 5)) print swapped(*ordered(4, 2))

"""Argument Packing You can _unpack_ sequences into arguments when you call functions, e.g., |ordered(*(3, 1))|, but you can _also_ define functions to accept **packed arguments** in a tuple. Take a look at |star_ordered| and |star_mixed|, for example. Here we use the |*| notation to indicate that we want to receive all of the unnamed arguments as a tuple. When you accept |*args| (or |*whatever|), you can place it at the end of a regular argument list, as shown in the accompanying |mixed_args|. It cannot be followed by regular arguments. Exercises - Try calling |star_ordered| with more than 2 arguments. What happens? - The builtin |min| function returns either the smallest of its arguments or the smallest item in a sequence, depending on how it is called. Implement your own |myMax| function that works similarly, but returns the largest item. - Play around with |sorted|. See what happens when you pass it a string, or a tuple, or a list. """ # Here, '*args' means "take all arguments and # stick them into the 'args' tuple in order". # # Also, 'sorted' is a handy function - it takes # any sequence and returns a sorted list. # def star_ordered(*args): return sorted(args) print "ordered:", star_ordered(6, 3) # You can mix regular and star parameters, if the # star ones come last. # def star_mixed(a, b, *others): print a, b, others # Note how the arguments are printed. star_mixed("hi", "there,", "what's", "your", "name?")

"""Named and Default Arguments Functions (or any callable, really) can be defined to allow some or all of their arguments to have **default values**. We have already seen this with the |dict| call, where you can call it without parameters to create a new empty dictionary, or you can call it with a list of |(key, value)| pairs to create a dictionary that is ready to go with that data. To define defaults for function arguments, you assign them where they are declared, thus: def myfunc(greeting, name='Compadre'): print greeting, name + '!' In this example, the parameter called |name| has a default value that will be used if the caller does not specify it. With an understanding of defaults, it now makes sense to mention **named arguments**. When calling a function, you can specify some or all of the parameters by name, using |name=value| syntax. When arguments are named, they no longer need to appear in order. Exercise - Take a careful look at the code examples. Fiddle with them until they make sense. - Named arguments must come last. Try uncommenting the final |print_many_args| call and see what happens. """ # If no name is specified when this is called, the # default value is used. def greet(greeting, name='Partner'): print greeting + ',', name + '!' # Use the default name. greet("Howdy") # Use our supplied name. greet("Hello", "Honey") # Call using named arguments. Note that, when # naming arguments, order is unimportant. greet(name='crazy', greeting='Wow') # Let's accept even more arguments. def print_many_args(a, b, c, d="D", e="E", f="F"): print a, b, c, d, e, f # Regular call without defaults: print_many_args("1", "2", "3", e="new_E") # It's always a good idea to specify default # arguments by name, every time. Don't do this # (even though it works just fine): print_many_args("1", "2", "3", "4", "5") # This won't work at all, because named arguments # must come last. Try uncommenting this line and # see what happens: # print_many_args("1", b="hello", "2")

"""Named Argument (Un)packing You can unpack sequences into function arguments by prefixing them with |*|, and you can accept arguments as tuples in your own functions by specifying a |*args| parameter. These work based on **argument position** - everything is sent and received in order. In much the same way, you can unpack a dictionary into **named arguments** using the |**| prefix, and your functions can accept _otherwise unspecified_ named arguments in a dictionary using the |**kargs| notation, as shown in the code window. Note that function parameters must be _defined_ in a particular order: positional first, then |*args|, then |**kargs|. Similarly, they must be _sent_ in a particular order: positional first, then named. Exercises - The |dict| callable creates a dictionary either from a sequence of |key, value| pairs or from its named arguments (the names become keys in the new dictionary). Create and print a dictionary using |dict| and named arguments. - Now try to specify both a sequence of pairs and named arguments. What happens? - Try calling |dict| with another dictionary and some named parameters. What happens? """ def takes_two(first, second): print "first:", first print "second:", second # You can unpack a dictionary into named # arguments with **: # takes_two(**{'first': 'the first thing', 'second': 'the second thing'}) # You can also define a function that accepts # unknown named arguments in a dictioary. Any name # that is not 'prefix', 'name', or 'suffix', will # end up in kargs. # def accepts_keys(prefix, name, suffix='', **kargs): print "The Famous", prefix, name + ',', suffix print "Extra Info:", kargs accepts_keys("Dr.", "Batman", "PhD.", sidekick="Postdoc Robin") accepts_keys(name="Mata Hari", role="Spy", prefix='Ms', interrogator="Sir Basil Thompson") # You can also accept both types of arguments: # def accepts_everything(a, b, *args, **kargs): print a, b, args, kargs accepts_everything(1, 2, 3, 4, 5, x='time', y='money')

"""Docstrings Now that we have defined our own functions, it makes sense to talk about how to document them properly. Earlier, it was briefly mentioned that comments are not the favored tool for creating documentation in Python: **docstrings** are. A string becomes a docstring when it is the first statement in a module, class, or function, simply by virtue of its position. It does not need to be assigned to anything. The |pydoc| utility and |help| function each format these docstrings and display them when requested. At the command line, for example, you can type pydoc list And get a nice help page made up mostly of module docstrings. In this interactive tutorial, you can instead call |help()| at the bottom of the code to see something similar in the output window. Exercises: - Try running |help()|. - Try running |help(a_complex_function)|. """ __doc__ = """Short description of the module. A longer description of the module. This docstring can be accessed in the module-global __doc__ variable. """ def a_complex_function(a, b, c): """Do a complex operation on a, b, and c. This will do amazing things with a, b, and c. Just watch. Args: a: A boolean value (see above). b: A boolean value (again, see above). c: A sequence. Returns: Nothing - awesomeness needs no return value. """ print "Shhh: it's actually not all that complex:" print a, b, c # No return statement, or an empty return statement, # will implicitly return None. print "Result of a complex function:" print a_complex_function(True, False, [1,2,3,4]) print "The module docstring:" print __doc__ print "The function docstring:" print a_complex_function.__doc__

"""Docstrings as Tests With an understanding of docstrings, we can now take advantage of a very cool facility in Python called **doctests**. Unit tests can be a real pain to write, because you have to force yourself to switch gears when in the code-writing zone. Doctests help to make it easier to write simple tests _while you write your documentation_. The idea is simply this: you write, inside of the docstring, a short "interpreter session": you write down something that you could type in the interactive interpreter, followed by the results you would see after it executes. You can then easily test whether that actually happens or not. Doctests are *usually* run by importing |doctest| and running |doctest.testmod()|. In our web version, however, we just run |_testmod|, a special facility for this environment. We'll use doctests for the rest of the tutorials to help with the exercises and to show how things work. Exercises - Make |less_than_five| pass by making its implementation match its documentation. - Make the module doctest fail. You can do anything (like saying that |True| produces |False|). """ __doc__ = """A testable module. What follows is a doctest. We basically mimic the Python interactive interpreter prompts >>> and ..., and show expected output below them. >>> less_than_five(3) True """ def less_than_five(a): """Return True if a < 5. >>> less_than_five(10) False >>> less_than_five(5) False >>> less_than_five(2) True """ return a <= 5 # Actually run the doctests: print "Running tests - no news is good news:" _testmod()

"""Exercise: Functions and If (1) Now we have enough tools to do something more interesting! Let's remind ourselves of how |if| and **slicing** work. For this and later exercises, you will fill in the code marked |# TODO:| to make the doctests pass. Remember that you can use |[::-1]| to get a reversed sequence using a slice. First try running the code without changes. What fails? Exercises - Write the body for the function |reverse_a| by replacing the |TODO| comment with real code. If the string |s| starts with the letter |"a"|, return it reversed. Otherwise return it unchanged. You may want to use |s.startswith('a')| instead of |s[0] == 'a'| so that the function will also work on empty strings. """ __doc__ = """Functions and branching exercise (1) Make these tests pass: >>> reverse_a("a silly thing") 'gniht yllis a' >>> reverse_a("not so silly") 'not so silly' >>> reverse_a("") '' """ def reverse_a(s): """Return s reversed if it starts with a, not reversed otherwise.""" # TODO: Fill this in. if _testmod().failed == 0: print "Success!"

"""Exercise: Functions and If (2) For this exercise, you get to write the whole function out, including the name and arguments. Docstrings are optional, but will produce more bonus points. Exercises - Write a function |every_other_arg| that accepts any number of arguments and returns a list containg every other one. Recall that |[::2]| will produce every other element of a sequence, and |*args| will collect all function arguments into a single tuple. """ __doc__ = """More Practice with Functions and Branching >>> every_other_arg(0, 1, 2, 3, 4, 5, 6) (0, 2, 4, 6) >>> every_other_arg() () >>> every_other_arg("goodnight", 0, "my", 1, "someone") ('goodnight', 'my', 'someone') """ # TODO: write the function to pass the tests # above. if _testmod().failed == 0: print "Success!"

"""Exercise: Functions and If (3) This time we'll use the |sorted| builtin function, along with some slicing and |if| statement work, to make the tests pass. First, a couple of reminders are in order: - |sorted| accepts a sequence and returns a sorted list. - Lists can be joined together using |+|, like this: |[1, 2] + [3, 4]|. - Slices can use negative values to indicate "distance from the right side", like this: |(0, 1, 2, 3)[-2:]|, which produces the last two elements |(2, 3)| (it means "start at 2 from the right and take everything from there"). You may want to review slices quickly before diving in. - The length of a sequence is obtained with |len|. Exercise - Write the |kind_of_sorted| function and make the module docstring pass. It accepts one argument: a list, and returns that list with _all but the first two and last two elements sorted_. The first two and last two elements should remain in the same place. Hint: what should happen when the list is small or empty? How small? """ __doc__ = """Kind of Sorted >>> kind_of_sorted([8,7,6,5,4,3,2,1,0]) [8, 7, 2, 3, 4, 5, 6, 1, 0] >>> kind_of_sorted([5, 4, 3, 2, 1]) [5, 4, 3, 2, 1] Now check this out - creates a list of characters from a string, gets it kind of sorted, and joins the result back into a string: >>> ''.join(kind_of_sorted(list("aragonite"))) 'araginote' >>> kind_of_sorted([]) [] """ def kind_of_sorted(seq): """Sort all but the first two and last two elements.""" # TODO: Fill this in. if _testmod().failed == 0: print "Success!"

"""Main Functions Python just takes your code and starts doing what it says, from top to bottom. When you run the code in the window here, Python just reads it top to bottom and executes it. When it encounters things like |def|, it knows to save them for later so you can call them. But if it encounters code that it can execute right away, it just does it. It does this not only when running your program, but also _when importing modules_. Folks running |import foo| don't typically expect a lot of work to be done when they do that - they're providing the work, the module should just provide the tools. So, modules should not typically do anything other than provide variables and |def| and |class| statements for other code to use. But, it can still be useful to "run" module code by itself, like with |doctest|. We can, it turns out, have it both ways. A very common idiom is to check the module's |__name__| to determine whether it is being imported or not, and to act accordingly. That idiom is shown here. A bit of free advice: *always do this* in real code. Exercises - Print |__name__|. - Now |import math| and print |math.__name__|. """ __doc__ = """Main Functions Demo When writing your code, it's a good idea to have as little in the module's global namespace as possible. This is typically accomplished by testing the module's __name__ and providing a main function where all of the work is really done. See below: we test __name__ == '__main__'. If it does, we are not being imported, so we execute the main function. Otherwise we do nothing (and just provide stuff for other people to use). """ def main(): print "Here is where we do the *real* work." if __name__ == '__main__': main()

"""Recursion With an understanding of how to write and call functions, we can now combine the two concepts in a really nifty way called **recursion**. For seasoned programmers, this concept will not be at all new - please feel free to move on. Everyone else: strap in. Python functions, like those in many programming languages, are _recurrent_: they can "call themselves". A |def| is really a sort of template: it tells you *how something is to be done*. When you call it, you are making it do something *specific*, because you are providing all of the needed data as arguments. From inside of the function, you can call that same template with something specific *and different* - this is recursion. For example, look at the |factorial| function in the code window. It starts with a **base case**, which is usually a really easy version of the problem, where you know the answer right away. For non-easy versions of the problem, it then defines a **recursion**, where it calls itself with a smaller version of the problem and uses that to compute the answwer. Exercises - Uncomment the |print| statements inside of |factorial| (above and below |smaller_problem|) to see what is happening. """ __doc__ = """Introduction to Recursion The "factorial" of something is formed by multiplying all of the integers from 1 to the given number, like this: factorial(5) == 5 * 4 * 3 * 2 * 1 You can do this recursively by noting that, e.g., factorial(5) == 5 * factorial(4) This can't go forever, because we know that factorial(1) == 1 See below. """ def factorial(n): if n <= 1: return 1 # print "before recursion", n smaller_problem = factorial(n - 1) # print "after recursion", n return n * smaller_problem # This gets big fast print "2! =", factorial(2) print "7! =", factorial(7) print "20! =", factorial(20)

"""Exercise: Recursion (1) Recursion is a pretty powerful idea. You can do a lot with it. In fact, you can do so much with it that some languages (not Python) use it as their main way of getting things done. Now you get to practice the idea of recusion with a simple problem. Before starting, though, remember these things: - Start with a *very easy* version of the problem. When do you know the answer without having to think about it? Write that down first and test it. - Then consider a slightly bigger version of the problem. How can you make it a bit smaller and use that to get the answer? You'll get lots of help on this one, so don't worry. Exercises - Write the |add_all| function as described in the docstring and its tests. A good base case for this is an empty list, which would have a sum of |0|. This is outlined in the first |TODO|. - Now write the recursion. You can use a slice to peel off one value and add it to the sum of _the rest of the list_. This is outlined in the second |TODO|. """ __doc__ = """Sum a List With Recursion >>> add_all([]) 0 >>> add_all([1]) 1 >>> add_all([3, 4]) 7 >>> add_all(range(1, 11)) 55 """ def add_all(seq): """Add all elements of a list.""" # TODO: Write a base case: return 0 if the list # is empty. Recall that empty == False in if # statements. Or you can test for len(seq) == 0. # # TODO: Write the recursion. You can either take # an element from the front of the list (seq[0]) # and add it to add_all of the rest, or you can # take one from the back (seq[-1]) and add it to # the rest, like this: # return seq[-1] + add_all(seq[:-1]) if _testmod().failed == 0: print "Success!"

"""Exercise: Recursion (2) Now we'll do something a little bit more interesting. We'll implement the Fibonacci sequence. The Fibonacci sequence shows up in lots of interesting places: http://en.wikipedia.org/wiki/Fibonacci_number In a nutshell, every number in the sequence is found by adding the previous two numbers, making a sequence like this: 1 1 2 3 5 8 13 21 ... The base case for this is "elements 0 and 1 get value 1". After that it's just "sum the previous two to get the next one." Exercises - Implement a function that returns the nth Fibonacci number. A base case has been provided; you fill in the recursion. Hint: you need |fibonacci| values for |n-1| and |n-2| to get your answer. Bonus Work - Implement a function |binary_search(value, sequence)| that does binary search on an ordered sequence by calling itself on smaller and smaller slices. """ __doc__ = """Compute the Nth Fibonacci Number. >>> fibonacci(0) 1 >>> fibonacci(6) 13 >>> fibonacci(7) 21 """ def fibonacci(n): """Compute the nth Fibonacci number.""" if n <= 1: return 1 # TODO: Fibonacci sequence if _testmod().failed == 0: print "Success!"

""""While" Loops Recursion is powerful, but not always convenient or efficient for processing sequences. That's why Python has **loops**. A _loop_ is just what it sounds like: you do something, then you go round and do it again, like a track: you run around, then you run around again. Loops let you do repetitive things, like printing all of the elements of a list, or adding them all together, without using recursion. Python supports two kinds. We'll start with **while loops**. A |while| statement is like an |if| statement, in that it executes the indented block if its condition is |True| (nonzero). But, unlike |if|, it *keeps on doing it* until the condition becomes |False| or it hits a |break| statement. Forever. The code window shows a while loop that prints every element of a list. There's another one that adds all of the elements. It does this without recursion. Check it out. Exercises - Look at |print_all|. Why does it eventually stop? What is the value of |i| when it does? - Why does |slicing_print_all| stop? How does it work? """ __doc__ = """Use while loops to do things repetitively.""" def print_all(seq): """Print all elements of seq.""" i = 0 while i < len(seq): print "item", i, seq[i] i = i + 1 # This is also spelled 'i += 1' def slicing_print_all(seq): """Another way of using while - less efficient.""" while seq: print seq[0] seq = seq[1:] def add_all(seq): """Add all of the elements of seq.""" i = 0 s = 0 while i < len(seq): s += seq[i] i += 1 return s print "Using indices:" print_all([1, 5, 8, "hello", 9]) print "Using slices:" slicing_print_all(range(3)) print "Summing:" print "sum of all:", add_all(range(1,12)) # Should be 66

""""For" Loops A much more common loop in Python is the |for| loop, short for "for every". It is much more convenient than |while| for doing something to every element of a sequence: for variable in sequence: body_statements Every time through the loop, |variable| is assigned the next element in |sequence|, and |body_statements| are executed. When there are no elements left, the statement exits. Of note is the use of the |in| keyword. But in this case it is not used merely as a test for containment, it is used as a way of saying "give me *everything* |in sequence|, one at a time in |variable|". Finally, we also revisit the concept of _unpacking assignment_. Note the loop that says |for i, x in ...|, which is a kind of assignment, one that happens every time the loop starts; unpacking works here too. """ __doc__ = """For Loops""" # For loops help you iterate over sequences: seq = [1, 3, 6, 10] print "sequence output" for x in seq: print x # Here's a way to add up all of the numbers in a # sequence: s = 0 for x in seq: s += x # Also spelled 's = s + x' print "sum", s # Note that you can also do unpacking assignment # in the loop itself: pairs = [(1, 'a'), (2, 'b'), (3, 'c')] for x, y in pairs: print "x:", x, "y:", y

"""Exercise: For Loops (1) For loops are pretty handy and compactly defined. They fit the way that people think when they want to "do something to everything in this list". They're also good for making one list from another one. We'll do that, here. Sometimes you have a list, and you realy just want to know which _index_ each value has. For example, you want to take a sentence and associate a location to each word. In this particular case, it is a convenient way of converting each pair into named variables without |[]|-indexing. Exercises - Write the |enumerator| function, which takes a sequence of items and returns a sequence of pairs, as described in the TODO. Make the test pass. """ __doc__ = """Enumerator Exercise >>> enumerator("stuff") [(0, 's'), (1, 't'), (2, 'u'), (3, 'f'), (4, 'f')] >>> enumerator(['a', 'b', 'c', 'd']) [(0, 'a'), (1, 'b'), (2, 'c'), (3, 'd')] """ def enumerator(seq): """[item, item, ...] -> [(0, item), (1, item), ...]""" # TODO: Implement this using a 'for' loop. Create a # new list and append elements to it. # HINT: The range function is useful for getting a # list of indices into a sequence, if you can take # its len. if __name__ == '__main__': if not _testmod().failed: print "Success!" # Note how, when we know we have a list of pairs, we # can just unpack them right in the loop statement. for i, x in enumerator("a sequence of characters"): print i, x

"""Exercise: For Loops (2) For loops are fundamental in many languages, but because of generators, which we'll discuss soon, you see them even more in Python than elsewhere, so we're going to pause and practice just a bit more. There are a couple of reminders that will probably help you for this exercise: - You can unpack _any_ tuple into variables like this: |a, b = "my", "tuple"| (recall that non-empty tuples can be defined without parentheses), which makes assigning multiple things at once pretty convenient. This can make the iterative |fibonacci| function really easy to follow, for example. - There are special assignment operators that allow the expression of things like |a = a + 1| to be written as |a += 1| instead. This pattern works for all binary operators, including the standard math operators like multiplication, division, addition, subtraction, and more. - You can get a sequence of integers by using the |range| function. This can be useful in loops, e.g., |for i in range(n):|, which assigns the numbers |0| through |n-1| to |i|, one at a time. Exercises - Implement |fibonacci| again, but this time with |for| loops. """ __doc__ = """Loop Exercises >>> fib = [] >>> for i in range(10): ... fib.append(fibonacci(i)) >>> fib [1, 1, 2, 3, 5, 8, 13, 21, 34, 55] """ def fibonacci(n): """Returns nth element of the Fibonacci sequence. """ x0, x1 = 0, 1 # TODO: Fill me in. # Recall that the sequence begins with 1, 1, # and every element thereafter is the sum # of the preceding two elements. So, keep track of # the last two elements when you want the next # one (which becomes one of the last two for next # time). return x1 if __name__ == '__main__': if not _testmod().failed: print "Success!"

"""Files We're getting close to being able to write something really useful. To do that, we need to _receive_ data from the outside world, not just _produce_ it. In this environment, you can access a virtual filesystem that is part of the in-browser interpreter. Let's do something silly: let's get all of the lines of the Python |string| module and print out the ones that contain comments. To do this, we'll use the builtin |open| function. It takes a filename as an argument and returns a "file-like" object. In Python-speak, this means it supports some basic things like |read|, |write| (if writeable), and _iteration_. Because file objects are **iterable**, they can be used as the sequence in a |for| loop. When used like this, they look like a sequence of lines. Another tidbit in the code is the use of |lstrip| and |rstrip| on each line in the file: - |lstrip|: strip whitespace from the left - |rstrip|: strip whitespace from the right (including newlines) There is also |strip|, which strips it from both sides. """ __doc__ = """Files: Opening the Code""" f = open('lib/pypyjs/lib_pypy/string.py') for line in f: if line.lstrip().startswith('#'): # ignore leading space print line.rstrip() # strip trailing space, including \n. f.close()

"""More on Files: Listing Directories Before we move on, let's do one more thing with files. Here, we will list all files in a directory and stick them into a list before outputting them. To do this, we import the |listdir| function from the |os| module. Heed the comment above the import, though - in the name of education we are ignoring best practice. Incidentally, the |os| module has a lot of interesting stuff in it. It's worth poking around the documentation when you can. Finally, note that we are using the |str.join| method, here, to print out the listing. Here |join| is called on the _delimiter_, and the list of strings is an argument. They are then joined into a single string. Take some time to understand what's happening. Exercises - Change the loop to only output names that end in |.py| and contain the word 'exercise'. You may find the |and| operator useful for doing this (there is also an |or| operator). """ __doc__ = """A Note on Imports It's usually best to import modules, not import stuff *out* of modules. This makes it easier to tell where things come from after you've been away for a while. We'll ignore that just to show how 'from' and 'as' work. """ from os import listdir from os.path import join as pathjoin parent = 'lib/pypyjs/lib_pypy' all_files = [] for name in listdir(parent): if not name.endswith('.py'): # Remember: 'continue' means 'jump to the top # of the loop again'. continue all_files.append(pathjoin(parent, name)) print '\n'.join(all_files)

""""With" Statements, and Our Tracker Now that we've fiddled around a bit with files and getting web content, we know that we don't really need to do much more with those: they're just iterables over lines, or (if you call |read|), big long strings. We can work with line iterables or long strings without hitting the file system or even the web, so we'll mostly proceed with smaller in-code data. Coming Up In the upcoming series of exercises and instructive slides, we'll build all of the pieces of a weight tracker with charts for your ... cat. Or dog. Or whatever politically correct and unembarrassing thing that you aren't allergic to. The idea will be to (eventually) produce a nice chart to demonstrate to kitteh's vet that the diet is going well. One More Concept This is chance to take another deep breath before the plunge. Let's quickly talk about files and |with| before we do. When using files, it's usually a good idea to make sure that they're closed when we're done with them, even if something goes wrong. An example of a very common idiom for that is shown in the code, using |with|. Don't worry too much about how it works, just get used to seeing it, particularly when working with files. """ __doc__ = """With Statements The "with" statement sets up a *context*. A context is an opportunity to do something with a resource, then have it automatically cleaned up when you're done. Files are a great and common example of why you want one: opening the file provides a context - you work with the file, and when the context exits, it closes it for you, even if your code has a fatal error. Another example is synchronization primitives like mutexes, which you want to release after you're done with them. """ import os.path filename = os.path.join("lib", "pypyjs", "lib_pypy", "warnings.py") with open(filename) as f: print f.read()

"""Raising Exceptions We want to build a basic tracker that can plot data in a nice chart. The exercises are going to be getting a bit longer, now. The format that we expect is a file of lines containing universal (and sortable) date strings, and a floating point measurement after some space, as shown in the accompanying code documentation. We will write a function that takes a file-like object and produces a list of (date, measurement) string pairs. If the dates are out of order, it will **raise** a builtin |ValueError| **exception**. This part is new: exceptions are, kind of like |return|, a way of exiting a function early. But unlike |return|, they exit *all calling functions, too*, until the program terminates or the exception is explicitly handled. They are for "exceptional" cases, like errors when you can't really recover because the problem is elsewhere. We'll get more into them later. For now, the idea is to use the |raise| keyword, then pass a message using the |ValueError| exception, which is, of course, callable. Exercises - Fill in the parts marked |# TODO| in the parse_measurements function. The description of what to do is there. You can test it by running it (which executes the doctest at the top of the module). """ __doc__ = """Parse dates, ensure monotonicity. We parse this format of "date measurement" entries, ensuring that the dates are in strictly ascending order. 2012-11-10 9.6 2012-11-11 9.5 2012-11-12 9.4 2012-11-13 9.1 Blank lines and comment lines are also allowed. See the tests here. >>> parse_measurements([' 2012-10-10 5.4 \\n', ... ' # comment!\\n', ... '2012-10-11 5.3']) ['2012-10-10', '5.4'] ['2012-10-11', '5.3'] >>> parse_measurements(['2012-10-10 5.4', '2012-10-09 5.3']) ['2012-10-10', '5.4'] Traceback (most recent call last): ... ValueError: Non-increasing dates: 2012-10-10 -> 2012-10-09 """ def parse_measurements(lines): """Parse date-measurement entries from lines. See docs above.""" last_date = "" # less than all other strings for line in lines: # TODO: # - Strip each line (using line = line.strip()) # - Skip blanks (continue if not line) # - Skip comments (continue if line.startswith('#')) # - Use 'split' and unpack into date and measurement # - If the date is not greater than the # previously-read date, raise ValueError as shown # in the commented-out code here: # # raise ValueError( # "Non-increasing dates: %s -> %s" % (last_date, date)) # # - Don't forget to set last_date down at the # bottom, here! (last_date = date). print [date, measurement] if __name__ == '__main__': if _testmod().failed == 0: print "Success!"

"""Generators It's head-exploding time! We recently wrote a function that, given lines with dates and measurements, splits them up, makes sure they only move forward, and prints them out. Printing is nice, but not impressively useful or exciting. We want to _transform_ this data (eventually into a chart), not just output it. Instead of printing, the function can be made more generally useful by returning a list. In fact, that's what we've done here. Take a look and see what it's doing. Keep in mind that in order to do this, even though the file is read incrementally, and the consumer may only need things one at a time, the entire dataset must be in memory for this to work. Let's fix that using one of Python's more powerful and elegant constructs: the **iterator generator**. By placing a "yield" keyword in the function, the function is changed to not merely return a single value, but to return an _iterable_ that can produce _all yielded values_ one at a time, when asked. Recall that |for| loops work with iterables, as does the |list| builtin. Exercises - Replace the code as described in the TODO sections and see how it works (and notice that we changed the name of the function to reflect what it returns). - Write a |for| loop in the main code (replace the use of |_testmod| if you want) that outputs the result of |parsed_measurements(...)| with some lines of your own. """ __doc__ = """Some notes on 'parsed_measurements'. This passes right now. Your job is to convert the function to a generator and keep it passing. >>> list(parsed_measurements(['2012-10-10 5.4', ... '2012-10-11 5.3'])) [('2012-10-10', '5.4'), ('2012-10-11', '5.3')] """ def parsed_measurements(lines): # TODO: # Remove this values list. Just kill it. values = [] last_date = "" # less than all other strings for line in lines: line = line.strip() if not line or line.startswith('#'): continue date, measurement = line.split() if date <= last_date: raise ValueError("Non-increasing: %s -> %s" % (last_date, date)) # TODO: # Replace this line with # yield date, measurement # And remove the return statement completely. # Then step back, run it, and see if you can figure # out what is going on. values.append((date, measurement)) return values if __name__ == '__main__': if _testmod().failed == 0: print "Success!"

"""Generators, Explained Let's talk more about what you just did. When you write a function that has a |yield| keyword, that function is transformed into an **iterator generator**, meaning that when you call it, it creates and returns an iterator that you can use to get at the values that it yields. We will talk more about the concept of **iterators** a little later on, but you should know that |for| loops actually work with iterators, not just with sequences. An iterator is something that you can call |next()| on, and it will produce a new value until it doesn't have any more. The example in the code window illustrates some of these concepts. Returning a list does just what you would expect, so printing it shows you a nice list. Calling a generator, however, does not return you a list, but ... something else. That something is an iterator that you can get values out of whenever you need a new one. Here we call |next(...)| on it to get one value at a time, and we also use it in a |for| loop. """ # A perfectly normal function. # def get_a_list(): my_list = [] for x in range(10): my_list.append(x) return my_list # A similar function, but it's really a generator. # def get_an_iterator(): for x in range(10): yield x print "Getting a list:", get_a_list() my_iter = get_an_iterator() print "Got ... something:", my_iter print "Getting the next value:", next(my_iter) print "Looping over the rest of it:" for x in my_iter: print x

"""Generators for Refactoring Now that we know how to make our own generators, let's do some refactoring to make use of this idea and clean up the code a bit. We'll start by splitting out the |clean_lines| function, which basically just skips blank lines and comments, stripping unnecessary space. This notion of converting one iterator into another is prevalent in Python. As one rather common example, the |enumerate| builtin converts an iterable over items into an iterable over |(index,item)| pairs. You built something similar earlier. Generators make refactoring sequence operations really easy, even operations that need to remember something about past elements. Without them, separating functionality like this would be hard or sometimes even impossible. Exercises - Look carefully at "clean_lines" and make sure you understand how it works. - Use "enumerate" to get line numbers with the data, and emit that line number in the ValueError message. Note that in string formatting, {0} means "the first argument". You can put any number in there, so long as it matches the position of what you pass to |format|. So, you could use |{2}| for the line number if you want. """ __doc__ = """Refactoring functionality. Changes: we now clean out comments and blank lines in a different function, and the error message for bad dates has the line number in it. >>> list(parsed_measurements(['2012-10-10 5.4', '2012-10-11 5.3'])) [('2012-10-10', '5.4'), ('2012-10-11', '5.3')] >>> list(parsed_measurements(['2012-10-10 5.4', '2012-10-09 5.3'])) Traceback (most recent call last): ... ValueError: Non-increasing (2): 2012-10-10 -> 2012-10-09 """ def clean_lines(lines): for line in lines: line = line.strip() if not line or line.startswith('#'): continue yield line def parsed_measurements(lines): last_date = "" # TODO: # Use 'enumerate(clean_lines(lines))' to get # (number, line) pairs. Use the number in the # exception message to show on what line the # error occurred. for line in clean_lines(lines): date, measurement = line.split() if date <= last_date: raise ValueError("Non-increasing: {0} -> {1}".format( last_date, date)) last_date = date yield date, measurement if __name__ == '__main__': if not _testmod().failed: print "Success!"

"""Real Dates, and Strings to Numbers So far we have done everything with strings. Now it's time to start using more interesting and appropriate objects. We can't very well do math with strings, after all. And, we might want to manipulate our dates in more meaningful ways than strings will allow, like outputting alternate date formats. Take a look at the doctest for |parsed_measurements|. It shows how we should be able to emit European date formats once we're done. We'll convert strings to numbers using |float|, and strings to dates using the |datetime| module. Note that |strptime| means "parse this string into a |datetime|" and |strftime| means "format this |datetime| into a string". The ugly names are historical and therefore traditional and sacred. By the way, we have also started using **named substitutions** in |str.format|. Check it out. Exercises - Fill in the part marked |# TODO|, making |measurement| into a float, and |date| into a |datetime| object. Bonus Work - Convert the final object into a |date| instead of a |datetime|, since it doesn't have a time component anyway. You may want to look at the help for |datetime.datetime|. """ __doc__ = """Convert lines 'date measurement' into pairs. >>> lines = ['2012-10-10 5.3', '2012-10-11 5.4'] >>> for d, w in parsed_measurements(lines): ... print type(d), d, type(w), w <class 'datetime.datetime'> 2012-10-10 00:00:00 <type 'float'> 5.3 <class 'datetime.datetime'> 2012-10-11 00:00:00 <type 'float'> 5.4 >>> for d, w in parsed_measurements(lines): ... print d.strftime("%d/%m/%Y"), w 10/10/2012 5.3 11/10/2012 5.4 """ # Not a module, but seriously? Who wants to type # "datetime.datetime.stuff" all the time? # Sometimes breaking the rules makes sense. :-) from datetime import datetime def parsed_measurements(lines): last_date = "" for i, line in enumerate(clean_lines(lines)): datestr, measurement = line.split() if datestr <= last_date: raise ValueError( "Non-increasing ({line}): {prev} -> {next}".format( line=i+1, prev=last_date, next=datestr)) # TODO: convert measurement to a float, and # use datetime.strptime(datestr, '%Y-%m-%d') # to get a real date object called 'date'. # Yield those instead. last_date = datestr yield datestr, measurement def clean_lines(lines): for line in lines: line = line.strip() if not line or line.startswith('#'): continue yield line if __name__ == "__main__": if _testmod().failed == 0: print "Success!"

"""Iterables and Iterators It has been mentioned that |for| loops iterate over any **iterable**, not just any sequence type. We also had a taste of what iterators do when discussing generators. Let's expand on that, now. The concept of iterable is more general than that of a sequence. In Python terms, an iterable is anything that can, when asked for it via |iter|, produce an iterator. Iterables include such things as lists, tuples, strings, sets, dictionaries, files, and of course, generators. You can obviously use an iterable in a |for| loop, but that is not all. You can also ask one for an iterator that you can advance _by hand_. We haven't done that very much before, so let's do it now. An important thing to note about iterators is that, once partially consumed, they do not rewind. Looping over a partially-consumed iterator begins where it last left off. When an iterator is exhausted, advancing it causes the builtin |StopIteration| exception to be raised. |for| loops know how to handle this, exiting cleanly when it occurs. When advancing things by hand, you have to be aware of it. The sample code demonstrates how these work, including the |StopIteration| exception. Take time to understand the examples. """ import string # This is a string, and is therefore iterable # letters = string.ascii_lowercase print letters # So, we can get an iterator from it. # letter_iter = iter(letters) print letter_iter # And we can call next on it to get a value and advance # it. # print next(letter_iter) print next(letter_iter) # Iterators are iterables that return themselves when # asked for an iterator, so they can also be used in # "for" loops. Note how it starts where it left off. It # is already partially consumed. # for letter in letter_iter: print letter, print # Let's advance to the end. # item_iter = iter((1,2)) print next(item_iter) print next(item_iter) # StopIteration exception! # "For" loops know how to handle this and exit cleanly # when they see StopIteration. # print next(item_iter)

"""Exceptions We have mentioned that a |for| loop knows when to stop looping by intercepting the |StopIteration| exception. We can also do that by hand. In fact, we can write an equivalent |while| loop by first creating an iterator, then calling |next| within a |try|/|except| block that breaks the loop when it gets a |StopIteration| exception. Observe the |try|/|except| block in the code. Statements that might raise an exception are in the |try| block. You can then handle those exceptions in the |except| part, and there can be more than one of these, e.g., if you want to do different things for different exceptions. The |Type as value| syntax is how we get at the actual exception data, if we want it. Here we discard it (in which case, we could have left off |as e| altogether and just said |except StopIteration:|). Exercises - Try printing |e| and |repr(e)|. See what it looks like. - Try removing the |try|/|except| block in the |while| loop and just printing. What happens? - Inside of |call_ponies|, Wrap the call to |print_ponies| in a |try|/|except| block that catches the exception and prints it out instead of terminating immediately. """ for x in "A pony, for me?": print x, print # Equivalent to the above is this "while" loop. # range_iter = iter("No pony for you today.") while True: try: print next(range_iter), except StopIteration as e: break # end the loop early and cleanly print # Clean exit! # Now for a more general exception raising/catching # example. # def print_ponies(number): if number < 0: # This is not in a "try" block - so it causes # the function to terminate immediately. raise ValueError("You have a debt of {n} ponies.".format(n=number)) print "You have {n} ponies".format(n=number) def call_ponies(number): print_ponies(number) print "No pony errors!" call_ponies(10) call_ponies(-2)

"""Smoothing Generator Anyone who has weighed their kitten will know that weight fluctuates from day to day. What you really want to know is whether the overall trend is good, not whether there has been more or less (to put it delicately) water output that day. You want the trend smoothed out over time. Your task: fill in the part marked |TODO| to do this smoothing without any |if| statements inside of loops. To accomplish this, we will again use a generator. This one will accept an iterable of floating point values and produce smoothed floating point values in return. We're doing this sort of in a vacuum, not taking the nature of our full date-endowed data into account. We'll run into that again later. Meanwhile, there's a nifty new concept hiding out in the code's doctest: _list comprehensions_. The gist: you can embed |for| syntax directly into list construction. Try to understand the comprehension in the docstring after you finish the exercise. We'll talk more about it later. Exercises - Fill in the part marked |TODO| by following the instructions in the comments. Try _not_ to use |if| to test for the first run through the loop. """ __doc__ = """Smoothing using a generator. "exponentially_smoothed" applies exponential smoothing to values. The first smoothed value is just the value itself. After that, each smoothed value is calculated to be 10% of the distance to the new value. >>> values = [8.2, 8.1, 8.0, 7.8, 7.9, 8.0, 7.5] >>> ["{0:.2f}".format(x) for x in exponentially_smoothed(values)] ['8.20', '8.19', '8.17', '8.13', '8.11', '8.10', '8.04'] """ def exponentially_smoothed(numbers): """Generate a smoothed sequence for the given numbers. """ # TODO: # Fill in the implementation: yield the first value # directly, then compute smoothed values by adding # 10% of the difference between the current # measurement and the previous smoothed value, thus: # smoothed += 0.1 * (value - smoothed) if __name__ == '__main__': if not _testmod().failed: print "Success!"

"""Function Objects It's time for another short detour into language concepts. Functions in Python are just a form of data like everything else. They can be assigned to variables, created and returned from other functions, etc. This can help us solve some otherwise thorny problems in a clean way. When you define a function _inside_ of another one, this is called a **closure**. It is special because it can not only see variables that are defined inside of it, it can also see variables in the _enclosing function scope_. And since it is created every time the outer function is called, you can use this to create new custom functions on demand. Look at the example code in the code window. Take a look at how |make_stuff_printer| defines an inner function, and then *returns* it. We then assign it to a variable, and by putting |()| after it, we *call* it. Note that you can't actually change the assignment of outer variables in Python 2 unless they're global. You can in Python 3 using the |nonlocal| keyword, but in Python 2 you have to resort to hackery like assigning to outer list elements. That was foreshadowing, in case you missed it. Exercises - Study the example code, see if you can predict what it will do, then run it. - Try calling |p()| _twice_ inside of the last |for| loop. What does it do? Why? """ # This is a function that returns another function. def make_stuff_printer(stuff): # The inner function has access to the "stuff" # variable passed into the outer function. def stuff_printer(): print stuff # Functions are just objects. If we don't call it, # it's just another thing to pass around. return stuff_printer # Create and call a new function. s = make_stuff_printer("What stuff?") # s is now a function, created by calling # make_stuff_printer. s() # Let's create a bunch of them. printers = [] for x in range(10): printers.append(make_stuff_printer("stuff %d" % x)) # Now we have a list of functions, all of which will # output something different. print printers # Let's call them all and see if they remember the # state of the world when they were created. for p in printers: p()

"""Mutable Closure Variables This is fairly advanced, and might take a little bit of head scratching to understand. Take your time, it's worth it. If you can understand what this code is doing, then you definitely understand scoping in Python. Let's use our new smoothing generator to change all of our |(date, measurement)| pairs into |(date, measurement, smoothed)| triples. Again, we will use a generator. Yes, they are that useful. You will therefore see them everywhere you look, including where there are no generators. They are like Python's hammer - now go find a nail. A quick note: the test for monotonicity (increasing dates) has been folded into the clean_lines function. Take a look if you're interested: this works because dates of the form yyyy-mm-dd sort properly as strings. Exercises - Implement the smoothed_data function (see |TODO|) to accept an iterable over (date, measurement) pairs, and produce a (date, measurement, smoothed) triple for each one. Note: this is not simple, because the smoothing generator only expects an iterable over raw data. We could just change parsed_measurements to emit triples, but this will provide good closure practice. Hint: What happens to the old date and measurement values at each iteration through the loop? Can you use them outside? """ __doc__ = """Augmenting data smoother. Here's the test to make pass: >>> lines = ''' ... 2012-01-01 7.6 ... 2012-01-02 7.7 ... 2012-01-03 7.5 ... 2012-01-04 7.3 ... 2012-01-05 7.4 ... '''.split('\\n') >>> for triple in smoothed_data(parsed_measurements(lines)): ... print [str(x) for x in triple] ['2012-01-01 00:00:00', '7.6', '7.6'] ['2012-01-02 00:00:00', '7.7', '7.61'] ['2012-01-03 00:00:00', '7.5', '7.599'] ['2012-01-04 00:00:00', '7.3', '7.5691'] ['2012-01-05 00:00:00', '7.4', '7.55219'] """ import datetime def smoothed_data(pairs): """Accepts pairs of values and produces an iterator over triples.""" last_values = [None, None] def saved_values_iter(): for d, w in pairs: last_values[:] = [d, w] # Save current data in outer scope before yield. yield w for smoothed in exponentially_smoothed(saved_values_iter()): # TODO: # Replace the yield below with a proper # implementation. The goal is to yield date, # measurement, and smoothed all at the same time. # But as we consume the exponentially_smoothed # iterator, it also consumes the # saved_values_iterator, so we have to get # date and measurement from somewhere else... yield None, None, None def exponentially_smoothed(numbers): it = iter(numbers) smoothed = next(it) yield smoothed for n in it: smoothed += 0.1 * (n - smoothed) yield smoothed def parsed_measurements(lines): for line in clean_lines(lines): d, w = line.split() yield datetime.datetime.strptime(d, '%Y-%m-%d'), float(w) def clean_lines(lines): last_date = "" for i, line in enumerate(lines): line = line.strip() if not line or line.startswith('#'): continue date = line.split()[0] if date <= last_date: raise ValueError( "Non-incrementing (%d):\n\t%s\n\t%s" % (i, last_line, line)) last_date = date yield line if __name__ == '__main__': if not _testmod().failed: print "Success!"

"""Comprehensions So far, we have (almost) always used |for| and |yield| to transform sequences. We'll keep doing that, but now in a slightly different, more compact form: **list comprehensions**. Consider the |get2| function in the code. Given an iterable over sequences (like a list of tuples or strings), it produces an iterator over two specified pieces of each sequence, and it does it lazily, outputting and consuming only one element at a time. It's pretty easy to understand. But, we can write this even more clearly and succinctly as a comprehension. Comprehensions can get pretty complex (and if you find that yours are, just stop and use a loop - you'll thank yourself later), but the most common form is pretty clear and easy to grasp: [new_item for item in iterable if condition] This creates a list from items in |iterable|, optionally filtering elements out that don't pass the |if| condition. There is a **generator comprehension** version of this, too, using |()| instead of |[]|, and lazily compues its output just like regular generators. When generator comprehensions are the only argument to a function, the parentheses can be dropped, making them easier to read, as in the |sorted| example in the code. Run it and see what it's doing. """ def get2(iterable, idx1, idx2): for val in iterable: yield val[idx1], val[idx2] a = [('a','b','c'), ('d','e','f'), ('j','k','l'), ('g','h','i'), ('m','n','o')] print "first two" print list(get2(a, 0, 1)) print "first and last" print list(get2(a, 0, 2)) # List comprehension print "first two - comprehension" print [(x, y) for x, y, z in a] print "first and last - filtered" print [(x, z) for x, y, z in a if x < 'j'] # Generator comprehension print "raw generator", ((x, y) for x, y, _ in a) # Look, Ma! No (additional) parentheses! print sorted((x, y) for x, y, _ in a)

"""Make Chart Data Let's do something fun, now. We'll use the (now deprecated, of course) Google Image Charts to generate a graph from our smoothed data. http://developers.google.com/chart/image/docs/data_formats To do this, we'll have our Python code produce a URL of a chart image. Note that the code is shorter because date strings are actually fine for our needs, the smoothing implementation is folded into the parser, and some well-placed comprehensions replace a few lines of logic. It's a good idea to take some time to understand the changes, particularly since some of them are _bad exmaples_, in that they make clear code much less clear by trying to do too much on one line. You are already equipped to understand all of it, but may need a moment to noodle it all precisely because these are examples of not-so-readable Python. Remember when you code, you are writing for a human audience as well as a computer. To generate a chart URL, we need to provide values in a comma-separated list with some additional parameters. The basic parameters are provided in the main() function, as are the basic data lines. Exercises - Fill in the part marked TODO, then paste the URL into your browser. """ __doc__ = """Pass these tests: >>> data = ''' ... 2012-01-01 8.5 ... 2012-01-02 8.4 ... 2012-01-03 8.1 ... 2012-01-04 8.3 ... 2012-01-05 8.0 ... 2012-01-06 7.9 ... '''.split('\\n') >>> make_chart_url_data(parsed_measurements(data)) '8.50,8.40,8.10,8.30,8.00,7.90|8.50,8.49,8.45,8.44,8.39,8.34' """ def make_chart_url_data(data): """Create chart URL data from (date, measurement, smoothed) triples.""" # TODO: # Data format is n,n,n,n,n|n,n,n,n,n,n # Where | separates different plots, and 'n' is a # value within a plot. # # return two plot sequences, one for # measurements and the other for smoothed measurements. # Note that this will require storage of some kind. # You can either store the data as a list and iterate # over it twice, or you can incrementally create two # lists while iterating over it once. # # Note that you can assign some of the pieces of each # item to _ to show you are ignoring them. # # Make use of str.join (where the string is the # separator), e.g., # ','.join("{0:.2f}.format(x) for x in some_sequence) # This joins the sequence in the argument with the # string on which join is called. def parsed_measurements(lines): splits = ((d, float(w)) for d, w in (x.split() for x in clean_lines(lines))) d, w = next(splits) smoothed = w yield d, w, smoothed for d, w in splits: smoothed += 0.1 * (w - smoothed) yield d, w, smoothed def clean_lines(lines): lines = (y for y in (x.strip() for x in lines) if y and y[0] != '#') last_line = "" for i, line in enumerate(lines): if line <= last_line: raise ValueError( "Non-incrementing (%d):\n\t%s\n\t%s" % (i+1, last_line, line)) last_line = line yield line if __name__ == '__main__': if not _testmod().failed: print "Success!" print ("http://chart.googleapis.com/chart?chs=320x200&cht=lc&chds=a&chd=t:" + make_chart_url_data(parsed_measurements(["2012-01-01 8.5", "2012-01-02 8.1", "2012-01-03 7.5", "2012-01-04 8.0", "2012-01-05 7.6", "2012-01-06 7.7"])))

"""Regular Expressions Python, like most other languages these days, has **regular expression** facilities, but not built into the language. If you don't know what regular expressions are, that's a topic all by itself, so we'll only be covering the barest of the basics here to show how to use them in Python. More info can be found here: http://docs.python.org/2/howto/regex.html To use regular expressions, you import the |re| module. You then have access to all of its functions, like |search|, |match|, and |sub|. There are many others. Note that |match| almost _never_ does what people think it should, so ignore it: |search| always works fine. You can also **compile** your regular expressions and use them pre-built. This can be more efficient, and it allows some of their parameters to be specified outside of the expression, like |IGNORECASE| instead of |(?i)|. It also makes it easier to remember parameter order for functions like |search| and |sub|. Note that we introduced a new kind of string here, called a **raw string**. This is a string specified with |r| in front of it, e.g., |r"I'm \\raw"|. Raw strings make the |\\| have no special meaning, so you'll see them used all the time with regular expressions, and you should adopt this practice as well. """ import re # When finding things using regular expressions, either # None or a match object is returned. Since None # evaluates to False in boolean contexts, you can do # things like this: if re.search(r"(?i)kittens", "Kittens on YouTube."): print "Kittens found!" # Match objects also contain information about the # search, like which groups matched where, etc. # Here is an alternative approach that first compiles # the regex and then uses it to extract group # information. expr = re.compile(r"^kittens (.*)$", re.IGNORECASE) match = expr.search("Kittens on YouTube.") print match.groups() # Note that we preface all pattern strings with the # letter 'r' because raw strings are best for regular # expression patterns, because they tend to be # backslash-heavy. print re.sub(r"(?i)(\s|.t)", "", "Kittens on YouTube") # With date strings: m = re.search(r"^(\d{4})-(\d{2})-(\d{2})$", "2012-10-31") print m.groups() # Just the year (groups are 1-based when accessed this # way): print m.group(1)

"""Chart Date Labels There are no labels on our chart, just lines. Let's add some labels. This will give us a chance to use regular expressions a bit to parse our date strings. We'll just omit the year and month, placing only the day on the chart. We will also send a parameter asking the chart service to supply its own y-axis labels. A regular expression that you might try is '^\d+-\d+-', which matches the first two date components in a string. If you |sub| that with the empty string, you'll be left with just the last part: the day. The above expression works because |'\d'| matches any digit, |'+'| means "one or more of the preceding", and |'^'| matches the beginning of the string. The |'-'| just matches itself. See http://docs.python.org/2/library/re.html for more details. Exercises - Fill in the bit marked |TODO|: get the day out of the each date string with a regular expression, then join all of the days together as specified, with the ||| character. Don't forget the URL parameters! The test will pass when you have it all right. Try running it first to get an idea of what's expected. """ __doc__ = """Make this test pass: >>> data = ''' ... 2012-01-01 8.5 ... 2012-01-02 8.4 ... 2012-01-03 8.1 ... 2012-01-04 8.3 ... 2012-01-05 8.0 ... 2012-01-06 7.9 ... '''.split('\\n') >>> print '\\n'.join(make_chart_url_data(parsed_measurements(data))) chd=t:8.50,8.40,8.10,8.30,8.00,7.90|8.50,8.49,8.45,8.44,8.39,8.34 chxt=x,y&chxl=0:|01|02|03|04|05|06| """ import re def make_chart_url_data(data): """Create chart URL data from (date, measurements, smoothed) triples. Returns: (data_string, label_string) """ data = list(data) datastr = 'chd=t:' + (','.join('%.2f' % x for _, x, _ in data) + '|' + ','.join('%.2f' % x for _, _, x in data)) # TODO: Let's generate some labels! # # The label format is # "chxt=x&chxl=0:|label|label|...|" Assign the # appropriate string to the "labelstr" variable. # Use a regular expression to "sub" the year and # month away, generating a string with only the # day. If you do that inside of a comprehension # of some kind, you can then use join to get the # | delimiters in there. labelstr = '' return datastr, labelstr def parsed_measurements(lines): splits = ((d, float(w)) for d, w in (x.split() for x in clean_lines(lines))) d, w = next(splits) smoothed = w yield d, w, smoothed for d, w in splits: smoothed += 0.1 * (w - smoothed) yield d, w, smoothed def clean_lines(lines): lines = (y for y in (x.strip() for x in lines) if y and y[0] != '#') last_line = "" for i, line in enumerate(lines): if line <= last_line: raise ValueError( "Non-incrementing (%d):\n\t%s\n\t%s" % (i+1, last_line, line)) last_line = line yield line if __name__ == '__main__': if not _testmod().failed: print "Success!" print ("http://chart.googleapis.com/chart?chs=320x200&cht=lc&chds=a&" + "&".join(make_chart_url_data(parsed_measurements(["2012-01-01 8.5", "2012-01-02 8.1", "2012-01-03 7.5", "2012-01-04 8.0", "2012-01-05 7.6", "2012-01-06 7.7"]))))

"""Histograms and Dictionaries Suppose we want to know how often the measurement changed in one direction or another. Let's use a dictionary. Dictionaries are good for this because you don't have to know what keys they'll have before you start. With a list, you do (unless you're just appending). We'll take the change in measurement as the key to the dictionary, and the value will be the number of times we've seen that change. A couple of quick reminders: you will need to use |in| or |not in| to check for the key's existence before you can get its value to increment it. If it isn't there, you store it, otherwise you add one to it. You will also want to use the |abs| builtin to compute the absolute value of things (since we don't care whether the difference is up or down for this application). Exercises - Your job is to fill in the implementation. Keys should be in |%.2f| (or |{:.2f}|) format, and values are the number of times we've seen the key (the absolute difference between adjacent measurements). Bonus Work - Look up |collections.defaultdict| and use that instead to save yourself some code. """ __doc__ = """Histogram example. Measurements are just floating point values. Truncate differences to at most 2 decimal places. Use the absolute value of the difference between adjacent values. >>> h = histogram([8.0, 8.2, 7.8, 7.9, 8.0, 7.7, 7.9, 7.6]) >>> print "\\n".join("%r: %r" % (k, v) for k, v in sorted(h.iteritems())) '0.10': 2 '0.20': 2 '0.30': 2 '0.40': 1 """ def histogram(data): """Given an iterable over measurements, produce a difference histogram.""" # TODO: Implement this, returning a dictionary # keyed on strings representing the absolute # difference between adjacent measurements. The # strings should be formatted {:.2f} as in the # doctest above. if __name__ == '__main__': if not _testmod().failed: print "Success!"

"""Sorting, Keys, and Lambda The histogram dictionary is nice, but it is not sorted (well, it is in the doctest). Let's talk about sorting. All lists have a |sort| method. If you call it, e.g., mylist.sort() It will sort the list in place. You can also use the builtin |sorted| function, which takes any sequence (not just a list) and produces a new sorted sequence from it. The code has some examples. Note: |sort| and |sorted| accept several optional parameters. One of the most interesting is |key|. If you create a function that produces a key given one of the values in your sequence, it will use that key to determine order instead of the value itself. For example, to sort a list of numbers backwards, the key might be described as "take the negative". In our example, we specify that function using a |lambda|. Lambdas are basically one-line functions that accept some arguments and evaluate exactly one expression, which they return, e.g.: myfunc = lambda x: x+10 myfunc(2) == 12 # True """ import collections def histogram(data): # When an item is not present, defaultdict uses # the callable you pass it to create and insert # a new value. In this case, 0. hist = collections.defaultdict(int) diter = iter(data) last = next(diter) for val in diter: hist["%.2f" % abs(val-last)] += 1 last = val return hist hist = histogram([10, 10.2, 10.4, 10.2, 10.1, 10.0, 9.5, 9.8, 8.7]) print "Raw:" print hist # Now try sorting it. print "Sorted:" for k, v in sorted(hist.items()): print "%s: %d" % (k, v) # Now try sorting it with a weird key (string reversal): print "Weirdly sorted:" for k, v in sorted(hist.items(), key=lambda x: x[0][::-1]): print "%s: %d" % (k, v)

"""Command-line arguments and flags. This particular part might require you to use the command line to really try it out, but we'll fake it a bit for you to give you an idea of what's what. When invoking your programs from the command line, you can accept arguments and do things with them. For example, you might want your tracker to accept different filenames to generate chart data for different cats. To do this, you access |sys.argv|. For optional parameters, you can (and should) also use the |argparse| module as shown. It's fairly straightforward to set up, and then you can just access things by name. Documentation can be found here: http://docs.python.org/2/library/argparse.html#module-argparse Tracker Concluded Now you can really go write that tracker program, and we'll put this particular project concept behind us in favor of moving on to more advanced things. But, you have definitely learned enough already to write useful software with the language. It might be a good idea to pause, look over the slides to this point one more time, and try writing some small programs. Or, you could just plunge ahead. There's a lot more fun to be had. """ import argparse PRETEND_COMMANDLINE = './tracker.py input.txt --dryrun -o output.url' def main(): # Describe what arguments we understand. parser = argparse.ArgumentParser(description="Track Kitty's Progress") parser.add_argument('input_file', type=str, help=("Input file name; space-separated " "date, measurement values")) parser.add_argument('-o', '--output', type=str, help="Output file name") parser.add_argument('-n', '--dryrun', action="store_true") # We can also pass in a list directly, but # called without arguments it imports sys and # uses sys.argv. args = parser.parse_args() print args print args.output print args.input_file print args.dryrun # Pay no attention to the man behind the curtain. if __name__ == '__main__': # Set up a fake set of arguments, pretending # that we were invoked like this: import sys sys.argv = PRETEND_COMMANDLINE.split() # Way simplistic, not fully general. main()

"""Classes are Types Let's move on to **classes**. We've been using them already without directly talking about it, so let's get down to what they really are. In general, you can think of a class as a **type**. This is, of course, merely a useful fiction because it hides subtlety, but it is still a great way to think about it, because classes allow you to create a bunch of things that are the same _kind_ or _type_ of thing. We'll learn how to make our own types in the coming slides. Calling a class makes a new **instance** of it. If you think of a class as a blueprint for, say, a house, an instance is the actual house you build by following the plan. Some basic properties of classes are demonstrated in the example code by looking at |ValueError|, which is a class we've seen and used before. You've seen a lot of other classes already, such as |list|, |tuple|, |dict|, |int|, |float|, and others. We've been referring to them as "callables", because they are, but that's because _all_ classes are callable: calling one creates an instance. """ # What is this type of thing anyway? print "What's a ValueError class?" print " ", repr(ValueError) # Make a new instance of ValueError by calling it. ex = ValueError("My super informative error message") # What is this? # Note how "repr" in this case shows you how to # make one, which can be really useful. print "What's a ValueError instance?" print " ", repr(ex) print "What (non-special) stuff is inside of it?" print " " + "\n ".join(x for x in dir(ex) if x[:2] != '__') # Now, there are various ways of getting at the # message: print "args: \t", ex.args print "message:\t", ex.message print "str: \t", str(ex) # But "str" just calls the __str__ method: print "__str__:\t", ex.__str__() # And since it has a __str__ method, print can use # it directly: print "Bare: \t", ex

"""What is a Class, Really? Everything that is a thing in Python has a class behind it. That class is like a description, telling you what its instances are _like_, what they _contain_, and what they can _do_. To create your own sort of class in Python, you use a |class| declaration as shown in the sample code. The declaration includes the name of your class, and a list of other classes that you **inherit** from. In Python 2, this is often just |object| (in Python 3 you can omit it to inherit from |object| by default): class MyClassName(object): The body of the class, like in other Python scopes, is indented below the declaration. Take a look at the sample code. There we define three **methods**: |__init__|, |__str__|, and |__repr__|. These are all _special_ methods, since they start and end with double underscores. Special methods are used by Python to do lots of things. The |__init__| method, for example, is called when an instance is created. You can see this when we create |new_shoe|: when you call a class, Python creates a boring empty instance (with methods), then passes that to |__init__| so you can fill it in with more interesting stuff. Similarly, when you call |str| or |repr| on an instance, Python will try to call its corresponding special methods. Take a look and see if you can tell how it works. """ class Shoe(object): """Class docstring - tell what this *is*.""" def __init__(self, color, lace_holes, us_size, weight_oz): """Make a new shoe with the given data.""" self.color = color self.lace_holes = lace_holes self.us_size = us_size self.weight_oz = weight_oz def __repr__(self): return "Shoe({!r}, {!r}, {!r}, {!r})".format( self.color, self.lace_holes, self.us_size, self.weight_oz) def __str__(self): # Note how we do *implicit* string # concatenation here: if two string constants # are right next to each other, they are joined. return ("A size {size} {color} shoe " "with {holes} lace holes. " "It weighs {weight} ounces.".format( size=self.us_size, color=self.color, holes=self.lace_holes, weight=self.weight_oz)) # "Shoe" is a class. Let's create a specific # instance of it and do stuff with it: new_shoe = Shoe("red", 10, "8.5 children's", 6) print repr(new_shoe) print new_shoe

"""More on Special Methods We did some work with the |Shoe| class previously. Let's explore that some more. Every method of a class takes |self| as its first parameter. You don't have to pass it in: Python does that for you. You can actually name it anything you want, but the universally accepted convention is to call it |self|, so you should, as well. But, more on that later. For now, let's talk about special methods. There are a *lot* of special methods you can write to change the behavior of your class. For example, if you want your class instances to be _iterable_, you can define the |__iter__| method to return an iterator. If you want it to be _indexable_ using |[]|, you would define one or more of the |__getitem__|, |__setitem__|, or |__delitem__| methods. There are ways to make instances look like numbers (e.g., |__add__|, and |__lt__|), sequences (e.g., |__nonzero__| and |__len__|), and even functions (by defining |__call__|). A full list is here: http://docs.python.org/2/reference/datamodel.html#special-method-names Exercises - Make |Shoe| iterable by adding an |__iter__| method that emits each shoe characteristic, one at a time. Print it in a |for| loop. *Hint:* if |__iter__| is a generator, calling it will return an iterator. """ class Shoe(object): """Class docstring - tell what this *is*.""" def __init__(self, color, lace_holes, us_size, weight_oz): """Make a new shoe with the given data.""" self.color = color self.lace_holes = lace_holes self.us_size = us_size self.weight_oz = weight_oz def __str__(self): return "Shoe({!r}, {!r}, {!r}, {!r})".format( self.color, self.lace_holes, self.us_size, self.weight_oz) # We can set one method to be equal to another. # TODO: try removing this and see what happens. __repr__ = __str__ # "Shoe" is a class. Let's create a specific # instance of it and do stuff with it: new_shoe = Shoe("red", 10, "8.5 children's", 6) print repr(new_shoe) print new_shoe

"""More on Self When you define a class, you can put data and methods into it. We have seen that you define methods by indenting function declarations below the class declaration, and that they are required to accept |self| as their first parameter. But what is |self|, exactly? The short version is this: |self| is the instance. So, when you do something like this s = Shoe('blue', 4, '6w', 12) s.change_color('green') It's the same as if you had done this (try it!) Shoe.change_color(s, 'green') The |self| in |change_color| is whatever |s| is holding. It's the instance of |Shoe| that we just created: the thing on the left of the dot. As we've seen, the way that you create variables inside of an instance is just like we do in all other cases in Python: we assign them. These variables do not exist before they are assigned, so in |__init__| you'll typically see a lot of variable assignments just to set things up. Note that |self| is only automatically passed in if you call the function on an **instance**. If you call it on a **class**, it is not. """ class Shoe(object): """Class docstring - tell what this *is*.""" def __init__(self, color, lace_holes, us_size, weight_oz): """Make a new shoe with the given data.""" self.color = color self.lace_holes = lace_holes self.us_size = us_size self.weight_oz = weight_oz def __str__(self): return "Shoe({!r}, {!r}, {!r}, {!r})".format( self.color, self.lace_holes, self.us_size, self.weight_oz) def change_color(self, new_color): self.color = new_color s = Shoe('blue', 4, '6w', 12) print s s.change_color('red') print s

"""Namespace Dictionaries When |__init__| is called, it is passed a fresh instance of the class, ready to have new data added to it. But what is this instance, really? In a nutshell, it's a _namespace_. Does that sound familiar? We've seen namespaces before, when we have _imported modules_. A module is one kind of namespace in Python, a class is another, and an instance is still another. In Python, namespaces are (almost) always implemented as _dictionaries_. The underlying dictionary that contains all of their data is available in the |__dict__| member of the namespace. Head over the to the code window and see what happens when you run it. Something strange has happened here, though. We can print |instance.SomeVariable|, so we might expect it to be in the instance dictionary, but it seems to be missing. It isn't there, but it *is* in the _class dictionary_. Python, when you try to access a member of an instance, will _search_ for it, starting at the instance dictionary, then if it isn't there, in the class dictionary. Exercise - Try changing something in the instance dictionary by assigning to, e.g., |instance.__dict__['random']|. Now try printing |instance.random|. What happens? """ # Three different kinds of namespaces: import string class MyTestClass(object): """My class docstring.""" SomeVariable = 'hi there' def __init__(self, arg): self.arg = arg instance = MyTestClass('some argument') # Note that instances can access class variables # directly, even if they aren't set in __init__. print instance.arg print instance.SomeVariable # Let's take a look inside of these, now: print "INSTANCE:------------------------------" print "\t\n".join("{0}: {1!r}".format(k, v) for k, v in instance.__dict__.iteritems()) print "CLASS:---------------------------------" print "\t\n".join("{0}: {1!r}".format(k, v) for k, v in MyTestClass.__dict__.iteritems()) print "MODULE:--------------------------------" print "\t\n".join("{0}: {1!r}".format(k, v) for k, v in string.__dict__.iteritems())

"""What Next? You have covered all of the really necessary basic parts of Python, and that is a lot. Well done! So, what do you do next? Depending on how comforable you are with this material, you may want to go back through the slides one more time, just to cement things in your mind. Of course, another next step could be to explore the more detailed official online Python tutorial: http://docs.python.org/2/tutorial/ Of particular interest might be a tour of Python's standard library: http://docs.python.org/2/tutorial/stdlib.html Meanwhile, there's a fun little program in the code window that you are welcome to play with at your leisure. There are a couple of new concepts hiding in there, like **decorators** (things starting with |@| that transform one function into another), so feel free to look them up if you're curious. Other than the decorators, though (of which only |@classmethod| and |@staticmethod| are used), you are, with a little time and head-scratching, completely equipped to understand what is happening here! Welcome to Python! """ __doc__ = """Sudoku solver, inspired by Peter Norvig. http://norvig.com/sudoku.html """ import random import re from math import sqrt __author__ = "Chris Monson <shiblon@gmail.com>" def main(): board = SudokuBoard.fromstring( """ .43 ... 62. 7.. 4.3 ..8 6.. 2.8 ..7 .75 ... 34. ... ... ... .98 ... 57. 9.. 5.7 ..3 1.. 6.2 ..5 .87 ... 26. """) print "Solution:" print board.search().pretty_str() class SudokuBoard(object): """Defines a Sudoku board, so we can solve one.""" def __init__(self): """Creates an empty sudoku board, with all squares unconstrained. All boards are assumed to be standard 9x9 boards. We could do better, but we don't bother for this class. """ self.square_size = 3 # large squares on a side self.size = self.square_size**2 # squares on a side numbers = self.numbers = tuple(xrange(1, self.size + 1)) rows = self.rows = range(self.size) cols = self.cols = range(self.size) self.values = dict(((r,c), numbers) for r in rows for c in cols) self.number_strings = '.' + ''.join(str(x) for x in self.numbers) @staticmethod def normalize_puzzle_string(string): """Remove superfluous fluff from a sudoku string and prepare it for import >>> SudokuBoard.normalize_puzzle_string('..-+5..__4.52230.30') '..5....4.5223..3.' """ string = re.sub(r"[\s|+-]+", "", string) string = re.sub(r"[0_]", ".", string) return string @classmethod def fromstring(cls, string): """Accepts a simple sudoku puzzle string in row-major format. [\s-_+] are all ignored, so it can be formatted in ascii art args: string: a string representing a puzzle """ string = cls.normalize_puzzle_string(string) size = int(sqrt(len(string))) square_size = int(sqrt(size)) if size**2 != len(string) or square_size**2 != size: raise ValueError("Invalid input string length: %d" % len(string)) # TODO: remove this constraint for larger puzzles: if square_size != 3: raise ValueError("Code currently only supports 9x9 puzzles") self = cls() # Fill in the cells at the places that are specified in the string for coords, char in zip(self.cells(), string): if char != '.': self.assign_value(coords, int(char)) return self def copy(self): """Return a copy of this puzzle""" new = self.__class__() new.values = self.values.copy() return new def search(self): """Searches the puzzle for a solution, returning a *new* puzzle. Returns False if it fails. This method always searches for the most constrained cell with no fewer than two values. Then it tries one. Calls eliminate_value, assign_value, and itself recursively. """ best_coords = None for coords in self.cells(): size = len(self[coords]) if size == 1: continue elif size == 0: return False elif best_coords is None or size < len(self[best_coords]): best_coords = coords if best_coords is None: return self possible_values = list(self[best_coords]) random.shuffle(possible_values) for val in possible_values: new_puzzle = self.copy() if new_puzzle.assign_value(best_coords, val): result = new_puzzle.search() if result: return result return False def eliminate_value(self, coords, killval): """Removes killval from cell at coords and propagates constraints in place. Propagates constraints, in the following way: - If the value is not in the specified cell, do nothing. - If the elimination results in a singleton, recursively eliminate that singleton from all peer cells. - If, after doing the recursive elimination, the eliminated value is only found in one cell in any given unit, eliminate it from all of that cell's peers. In other words, if I eliminate 3 from a cell, and after that's done I find that '3' is only in one cell in that row, then eliminate '3' from all cells in that row. - If at any time the number of values in a cell goes to zero, this is not a valid solution, so we return False. args: coords: (row, col) of cell to adjust killval: the value to be removed from this cell returns: False if the elimination results in an invalid puzzle, else True. """ if killval not in self[coords]: return True # Take the value out self[coords] = tuple(x for x in self[coords] if x != killval) cellvals = self[coords] if len(cellvals) == 0: return False elif len(cellvals) == 1: # This is now fully assigned - go ahead and kill it from all peers assigned_val = cellvals[0] for peer in self.peers_for_cell(coords): if not self.eliminate_value(peer, assigned_val): return False # Now check whether the eliminated value is uniquely found in any cell in # any unit. for unit in self.units_for_cell(coords): unit = list(unit) cells_with_killval = tuple(c for c in unit if killval in self[c] and c != coords) if len(cells_with_killval) == 1: if not self.assign_value(cells_with_killval[0], killval): return False return True def assign_value(self, coords, goodval): """Assigns a value to cell at coords and propagates constraints in place. Implemented using eliminate_value. """ cellvals = self[coords] for v in cellvals: if v != goodval: if not self.eliminate_value(coords, v): return False return True def __getitem__(self, key): return self.values[key] def __setitem__(self, key, val): self.values[key] = val def __len__(self): return len(self.values) def cells(self): """Returns a row-major iterator over all coordinates in the puzzle >>> list(SudokuBoard().cells())[3:12] [(0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (0, 8), (1, 0), (1, 1), (1, 2)] """ return ((row, col) for row in self.rows for col in self.cols) def row_for_cell(self, coords, include_self=False): """Iterator over all cells in this cell's row args: coords: (row, col) of this cell include_self: If True, includes given coordinates in output >>> s = SudokuBoard() >>> list(c for c in s.row_for_cell((5,2))) [(5, 0), (5, 1), (5, 3), (5, 4), (5, 5), (5, 6), (5, 7), (5, 8)] >>> list(c for c in s.row_for_cell((5,2), include_self=True)) [(5, 0), (5, 1), (5, 2), (5, 3), (5, 4), (5, 5), (5, 6), (5, 7), (5, 8)] """ row, col = coords return ((row, c) for c in self.cols if include_self or c != col) def col_for_cell(self, coords, include_self=False): """Iterator over cells in the column containing the given coordinates args: coords: (row, col) of the cell whose column will be returned include_self: If True, includes given coordinates in output returns: iterator over (row, col) tuples for this column >>> s = SudokuBoard() >>> list(c for c in s.col_for_cell((3,5))) [(0, 5), (1, 5), (2, 5), (4, 5), (5, 5), (6, 5), (7, 5), (8, 5)] >>> list(c for c in s.col_for_cell((3,5), include_self=True)) [(0, 5), (1, 5), (2, 5), (3, 5), (4, 5), (5, 5), (6, 5), (7, 5), (8, 5)] """ row, col = coords return ((r, col) for r in self.rows if include_self or r != row) def square_bounds(self, coords): """Returns the corners of the square containing this cell. The "upper left" is inclusive, the "lower right" is exclusive >>> SudokuBoard().square_bounds((4, 3)) ((3, 3), (6, 6)) >>> SudokuBoard().square_bounds((2, 6)) ((0, 6), (3, 9)) """ # There are square_size squares of side square_size on a side # (e.g. 3 squares of side-length 3 on a side) row, col = coords r_from = row - (row % self.square_size) r_to = r_from + self.square_size c_from = col - (col % self.square_size) c_to = c_from + self.square_size return (r_from, c_from), (r_to, c_to) def square_for_cell(self, coords, include_self=False): """Iterator over cells in the square containing the given coordinates args: coords: (row, col) of cell in square include_self (False): If true, the given coordinates are included in the iteration returns: iterator over (row, col) coordinate tuples >>> s = SudokuBoard() >>> list(c for c in s.square_for_cell((1, 1))) [(0, 0), (0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1), (2, 2)] >>> list(c for c in s.square_for_cell((8, 7), include_self=True)) [(6, 6), (6, 7), (6, 8), (7, 6), (7, 7), (7, 8), (8, 6), (8, 7), (8, 8)] """ (r_from, c_from), (r_to, c_to) = self.square_bounds(coords) for r in range(r_from, r_to): for c in range(c_from, c_to): if (r, c) != coords or include_self: yield r, c def units_for_cell(self, coords, include_self=False): """Iterator over row, column, and square units containing the given cell. args: coords: (row, col) of the cells whose units we wish to obtain include_self: If true, includes this cell in the output returns: iterator over iterators, in the following order: row col square >>> s = SudokuBoard() >>> for unit in s.units_for_cell((1,2)): ... list(unit) [(1, 0), (1, 1), (1, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8)] [(0, 2), (2, 2), (3, 2), (4, 2), (5, 2), (6, 2), (7, 2), (8, 2)] [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (2, 0), (2, 1), (2, 2)] >>> for unit in s.units_for_cell((1,2), include_self=True): ... list(unit) [(1, 0), (1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8)] [(0, 2), (1, 2), (2, 2), (3, 2), (4, 2), (5, 2), (6, 2), (7, 2), (8, 2)] [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 0), (2, 1), (2, 2)] """ # Output the row iterator. yield self.row_for_cell(coords, include_self=include_self) # Output the column iterator yield self.col_for_cell(coords, include_self=include_self) # Output the square iterator yield self.square_for_cell(coords, include_self=include_self) def peers_for_cell(self, coords, include_self=False): """Iterator over coordinates of all peers of this cell. All values show up exactly once. >>> peers = list(SudokuBoard().peers_for_cell((5, 8))) >>> peers[:8] [(5, 0), (5, 1), (5, 2), (5, 3), (5, 4), (5, 5), (5, 6), (5, 7)] >>> peers[8:16] [(0, 8), (1, 8), (2, 8), (3, 8), (4, 8), (6, 8), (7, 8), (8, 8)] >>> peers[16:] [(3, 6), (3, 7), (4, 6), (4, 7)] """ for c in self.row_for_cell(coords, include_self=include_self): yield c for c in self.col_for_cell(coords, include_self=False): yield c for c in self.square_for_cell(coords, include_self=False): if c[0] != coords[0] and c[1] != coords[1]: yield c def simple_cell_string(self, values): """Returns the simple string value of this cell, '.' for not fully assigned >>> s = SudokuBoard() >>> s.simple_cell_string((1,2,3)) '.' >>> s.simple_cell_string((2,)) '2' >>> s.simple_cell_string(()) '!' """ if len(values) == 0: return '!' elif len(values) == 1: return self.number_strings[values[0]] else: return '.' def simple_cell_strings(self): """Row-major iterator over cell string values. >>> s = SudokuBoard() >>> len(tuple(s.simple_cell_strings())) 81 >>> tuple(s.simple_cell_strings())[:12] ('.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.') >>> s[1,1] = (3,) >>> tuple(s.simple_cell_strings())[:12] ('.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '3', '.') """ return (self.simple_cell_string(self[r, c]) for r, c in self.cells()) def pretty_str(self): """Produce a nice-looking representation of the board. Only shows *fully constrained* values. Cells that are not fully defined show up as '.', as in simple_cell_strings. """ def row_at_a_time(): strs = list(self.simple_cell_strings()) rowstrs = [] for r in range(self.size): row = ''.join(strs[r*self.size:(r+1)*self.size]) pieces = [] for c in range(self.square_size): pieces.append(row[c*self.square_size:(c+1)*self.square_size]) yield ' '.join(pieces) if (r + 1) % self.square_size == 0: yield '' return '\n'.join(row_at_a_time()) def __str__(self): def format_cell(values): return "".join(self.number_strings[(v in values) * v] for v in self.numbers) def columns(row): return (format_cell(self[row, c]) for c in self.cols) return "\n".join(" ".join(columns(r)) for r in self.rows) def __repr__(self): return "%s.fromstring('%s')" % ( self.__class__.__name__, "".join(self.simple_cell_strings())) if __name__ == "__main__": main() #_testmod()

Interactive Python 2 Tutorial ▲▼ TOC

{{tutorial.title}}