Exercise Answers¶

The code cells below are example answers to the workshop exercises. They are useful if you get stuck and need a hint or if you want to use them as a comparison with your own attempts

1.1) Use unpacking for pretty printing¶

In [ ]:

Copied!

counties = ["Anoka", "Dakota", "Carver", "Hennepin", "Ramsey", "Scott", "Washington"]
print(*counties, sep='\n')
counties = ["Anoka", "Dakota", "Carver", "Hennepin", "Ramsey", "Scott", "Washington"]
print(*counties, sep='\n')

Anoka
Dakota
Carver
Hennepin
Ramsey
Scott
Washington

1.2) Use try/except¶

In [ ]:

Copied!





from math import inf
from typing import NamedTuple

class Record(NamedTuple):
    total_population: int
    population_in_poverty: int

record1 = Record(5000, 200)
record2 = Record(200, 0)

for field in Record._fields:
    try:
        ratio = getattr(record1, field) / getattr(record2, field)
    except ZeroDivisionError:
        ratio = inf
    print(ratio)
from math import inf
from typing import NamedTuple

class Record(NamedTuple):
    total_population: int
    population_in_poverty: int

record1 = Record(5000, 200)
record2 = Record(200, 0)

for field in Record._fields:
    try:
        ratio = getattr(record1, field) / getattr(record2, field)
    except ZeroDivisionError:
        ratio = inf
    print(ratio)

25.0
inf

1.3) Use standard library data classes¶

In [ ]:

Copied!





from dataclasses import dataclass

@dataclass
class Record:
    total_population: int
    population_in_poverty: int

record = Record(5000, 200)
record.total_population = 6000
print(record)
from dataclasses import dataclass

@dataclass
class Record:
    total_population: int
    population_in_poverty: int

record = Record(5000, 200)
record.total_population = 6000
print(record)

Record(total_population=6000, population_in_poverty=200)

1.4) Use the built-in min and max functions¶

In [ ]:

Copied!

from random import randint

nums = [randint(-1000, 1000) for i in range(20)]

print(max(nums), min(nums))
from random import randint

nums = [randint(-1000, 1000) for i in range(20)]

print(max(nums), min(nums))

891 -899

1.5) Open a file with a context manager¶

In [ ]:

Copied!

with open("exercise.txt", "w") as f:
    f.write("This is example text for an exercise.")
with open("exercise.txt", "w") as f:
    f.write("This is example text for an exercise.")

2.1) Tuple-based storage¶

In [1]:

Copied!





import sys

def tupleFromRange(r1, r2):
  """Create a tuple from a range of values"""
  return tuple(range(r1, r2+1))

start = 1900
end = 2030

studyYears = tupleFromRange(start, end)

print(studyYears)
print("Bytes used: ", sys.getsizeof(studyYears))
import sys

def tupleFromRange(r1, r2):
  """Create a tuple from a range of values"""
  return tuple(range(r1, r2+1))

start = 1900
end = 2030

studyYears = tupleFromRange(start, end)

print(studyYears)
print("Bytes used: ", sys.getsizeof(studyYears))

(1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030)
Bytes used:  1088

2.2) Set-based look-up¶

In [2]:

Copied!





placeNames_list = ["Kinshasa", "Duluth", "Uruguay", "Doherty Residence", "Dinkytown", "Khazad-dûm"]
placeNames_set = set(placeNames_list)

# Set look-up
if "Dinkytown" not in placeNames_set:
    print("Missing.")  # O(1) look-up
placeNames_list = ["Kinshasa", "Duluth", "Uruguay", "Doherty Residence", "Dinkytown", "Khazad-dûm"]
placeNames_set = set(placeNames_list)

# Set look-up
if "Dinkytown" not in placeNames_set:
    print("Missing.")  # O(1) look-up

2.3) Generator expression¶

In [3]:

Copied!





import random
import string

# Input dataset: A list of random strings. Each string is 8 letters long.
randomStrings = [''.join(random.choices(string.ascii_letters, k=8)) for i in range(10)]
print(randomStrings)

# Convert all strings to lowercase
lowerCase_gen = (x.lower() for x in randomStrings)
for x in lowerCase_gen:
  print(x)
import random
import string

# Input dataset: A list of random strings. Each string is 8 letters long.
randomStrings = [''.join(random.choices(string.ascii_letters, k=8)) for i in range(10)]
print(randomStrings)

# Convert all strings to lowercase
lowerCase_gen = (x.lower() for x in randomStrings)
for x in lowerCase_gen:
  print(x)

['qwUYHnBg', 'yypGsKIi', 'bRVmubKp', 'RbEdRWKF', 'zCRwzvBG', 'swaWiWNs', 'gXndKiBa', 'acRnakiC', 'kuElpvqh', 'fvyfTINZ']
qwuyhnbg
yypgskii
brvmubkp
rbedrwkf
zcrwzvbg
swawiwns
gxndkiba
acrnakic
kuelpvqh
fvyftinz

2.4) Generator¶

In [4]:

Copied!





# The list that each dataset will be compared to.
primary = [4, 7, 140, 55, 7, 91, 6]

# Input datasets
inputs = (
 [0, 3, 40, 55, 6, 98, 4],
 [5, 4, 3, 45, 1, 67, 2],
 [7, 150, 0.5, 1]
 )

def matchingStructure_gen(inputsList, primList):
  """
  This generator compares the length of each input collection to the primary
  list. An input that matches in length gets multiplied by the primary list and
  yielded.
  """
  for item in inputsList:
    if len(item) == len(primList):
      multiplied = [b - a for a, b in zip(item, primList)]
      yield multiplied

for item in matchingStructure_gen(inputs, primary):
  print(item)
# The list that each dataset will be compared to.
primary = [4, 7, 140, 55, 7, 91, 6]

# Input datasets
inputs = (
 [0, 3, 40, 55, 6, 98, 4],
 [5, 4, 3, 45, 1, 67, 2],
 [7, 150, 0.5, 1]
 )

def matchingStructure_gen(inputsList, primList):
  """
  This generator compares the length of each input collection to the primary
  list. An input that matches in length gets multiplied by the primary list and
  yielded.
  """
  for item in inputsList:
    if len(item) == len(primList):
      multiplied = [b - a for a, b in zip(item, primList)]
      yield multiplied

for item in matchingStructure_gen(inputs, primary):
  print(item)

[4, 4, 100, 0, 1, -7, 2]
[-1, 3, 137, 10, 6, 24, 4]

2.5) Compare differences in speed using `timeit`¶

2.5.1) `%timeit` line magic¶

In [5]:

Copied!

%timeit [i for i in range(50) if i % 2 == 0]
%timeit (i for i in range(50) if i % 2 == 0)
%timeit [i for i in range(50) if i % 2 == 0]
%timeit (i for i in range(50) if i % 2 == 0)

4.2 µs ± 833 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
505 ns ± 19.3 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)

2.5.2) `%%timeit` cell magic¶

In [6]:

Copied!





%%timeit -n 1 -r 1

employeeDatabase = [
  {'lastName': 'Knope', 'rate': 72000, 'pay_class': 'annual'},
  {'lastName': 'Gergich', 'rate': 17, 'pay_class': 'hourly'},
  {'lastName': 'Ludgate', 'rate': 60000, 'pay_class': 'annual'},
  {'lastName': 'Swanson', 'rate': 'redacted', 'pay_class': 'redacted'},
  {'lastName': 'Haverford', 'rate': 52000, 'pay_class': 'annual'}
]

def hourly_rate(payments):
  """Function that returns each salaried workers' hourly rate."""
  hourlyRates = []
  for worker in payments:
    if worker.get('pay_class') == 'annual':
      hourly = worker['rate'] / 2080
      hourlyRates.append(hourly)
  return hourlyRates

# Sum hourly rates for those receiving an annual salary.
salariesPerHour = sum(hourly_rate(employeeDatabase))

print(f"Total dispersments per hour for salaried employees: ${salariesPerHour:.2f}")
%%timeit -n 1 -r 1

employeeDatabase = [
  {'lastName': 'Knope', 'rate': 72000, 'pay_class': 'annual'},
  {'lastName': 'Gergich', 'rate': 17, 'pay_class': 'hourly'},
  {'lastName': 'Ludgate', 'rate': 60000, 'pay_class': 'annual'},
  {'lastName': 'Swanson', 'rate': 'redacted', 'pay_class': 'redacted'},
  {'lastName': 'Haverford', 'rate': 52000, 'pay_class': 'annual'}
]

def hourly_rate(payments):
  """Function that returns each salaried workers' hourly rate."""
  hourlyRates = []
  for worker in payments:
    if worker.get('pay_class') == 'annual':
      hourly = worker['rate'] / 2080
      hourlyRates.append(hourly)
  return hourlyRates

# Sum hourly rates for those receiving an annual salary.
salariesPerHour = sum(hourly_rate(employeeDatabase))

print(f"Total dispersments per hour for salaried employees: ${salariesPerHour:.2f}")

Total dispersments per hour for salaried employees: $88.46
79.4 µs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)

2.6) Check for speed bottlenecks in detail with `cProfile`¶

In [7]:

Copied!





import cProfile

with cProfile.Profile() as pr:
  dataList = [x for x in range(1, 10_000_000)]
  dataTuple = tuple(x for x in range(1, 10_000_000))

  listFromList = []
  listFromTuple = []

  for item in dataList:
    new = item + 1
    listFromList.append(new)

  for item in dataTuple:
    new = item + 1
    listFromTuple.append(new)

  pr.print_stats('cumtime')
import cProfile

with cProfile.Profile() as pr:
  dataList = [x for x in range(1, 10_000_000)]
  dataTuple = tuple(x for x in range(1, 10_000_000))

  listFromList = []
  listFromTuple = []

  for item in dataList:
    new = item + 1
    listFromList.append(new)

  for item in dataTuple:
    new = item + 1
    listFromTuple.append(new)

  pr.print_stats('cumtime')

         30000008 function calls in 4.517 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
 10000000    1.981    0.000    1.981    0.000 <ipython-input-7-233c72c0aca7>:5(<genexpr>)
 19999998    1.670    0.000    1.670    0.000 {method 'append' of 'list' objects}
        1    0.866    0.866    0.866    0.866 <ipython-input-7-233c72c0aca7>:4(<listcomp>)
        1    0.000    0.000    0.000    0.000 cProfile.py:41(print_stats)
        1    0.000    0.000    0.000    0.000 pstats.py:108(__init__)
        1    0.000    0.000    0.000    0.000 pstats.py:118(init)
        1    0.000    0.000    0.000    0.000 pstats.py:137(load_stats)
        1    0.000    0.000    0.000    0.000 cProfile.py:51(create_stats)
        1    0.000    0.000    0.000    0.000 {built-in method builtins.isinstance}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.000    0.000    0.000    0.000 {built-in method builtins.len}
        1    0.000    0.000    0.000    0.000 {built-in method builtins.hasattr}

2.7) Stretch goal: Raster generator¶

In [8]:

Copied!





# # # Exercise solution, version 1 # # #
import numpy as np

# Starting dataset: 80x80 grid of people per 500m².
highResPop = np.ones((80, 80)) * 5

def densityKM_gen(popArray):
    """
    Generator that yields rows of population density per km² cells from a
    500 m² resolution population source.

    Input:  500x500m 2D array
    Output: Each yield output is a 1D array representing one row of densities.
    """
    group_size = 20
    rows, cols = popArray.shape

    # Aggregate
    kmArray = popArray.reshape(
        rows // group_size, group_size,
        cols // group_size, group_size
    )

    # Sum over each group
    kmDensity = kmArray.sum(axis=(1, 3))

    for row in kmDensity:
        yield row  # Now yields an array


for row in densityKM_gen(highResPop):
    print(row)
# # # Exercise solution, version 1 # # #
import numpy as np

# Starting dataset: 80x80 grid of people per 500m².
highResPop = np.ones((80, 80)) * 5

def densityKM_gen(popArray):
    """
    Generator that yields rows of population density per km² cells from a
    500 m² resolution population source.

    Input:  500x500m 2D array
    Output: Each yield output is a 1D array representing one row of densities.
    """
    group_size = 20
    rows, cols = popArray.shape

    # Aggregate
    kmArray = popArray.reshape(
        rows // group_size, group_size,
        cols // group_size, group_size
    )

    # Sum over each group
    kmDensity = kmArray.sum(axis=(1, 3))

    for row in kmDensity:
        yield row  # Now yields an array


for row in densityKM_gen(highResPop):
    print(row)

[2000. 2000. 2000. 2000.]
[2000. 2000. 2000. 2000.]
[2000. 2000. 2000. 2000.]
[2000. 2000. 2000. 2000.]

In [9]:

Copied!





# # # Exercise solution, version 2 (even more memory efficient) # # #
import numpy as np

# Starting dataset: 80x80 grid of people per 500m².
highResPop = np.ones((80, 80)) * 5

def densityKM_gen2(popArray):
    """
    Generator that yields rows of population density per km² cells from a
    500 m² resolution population source.
    Unlike Solution Version 1, this generator does not create the entire km²
    array in memory. It saves memory by processing one group of 20x20
    cells at a time.

    Input:  500x500m 2D array
    Output: Each yield is a 1D NumPy array representing one row of km²
    densities, processed group by group.
    """
    import numpy as np

    group_size = 20
    rows, cols = popArray.shape

    num_row_blocks = rows // group_size
    num_col_blocks = cols // group_size

    for i in range(num_row_blocks):
        row_densities = []
        row_start = i * group_size

        for j in range(num_col_blocks):
            col_start = j * group_size
            block = popArray[row_start:row_start + group_size,
                             col_start:col_start + group_size]
            density = block.sum()
            row_densities.append(density)

        yield np.array(row_densities)


for row in densityKM_gen2(highResPop):
    print(row)
# # # Exercise solution, version 2 (even more memory efficient) # # #
import numpy as np

# Starting dataset: 80x80 grid of people per 500m².
highResPop = np.ones((80, 80)) * 5

def densityKM_gen2(popArray):
    """
    Generator that yields rows of population density per km² cells from a
    500 m² resolution population source.
    Unlike Solution Version 1, this generator does not create the entire km²
    array in memory. It saves memory by processing one group of 20x20
    cells at a time.

    Input:  500x500m 2D array
    Output: Each yield is a 1D NumPy array representing one row of km²
    densities, processed group by group.
    """
    import numpy as np

    group_size = 20
    rows, cols = popArray.shape

    num_row_blocks = rows // group_size
    num_col_blocks = cols // group_size

    for i in range(num_row_blocks):
        row_densities = []
        row_start = i * group_size

        for j in range(num_col_blocks):
            col_start = j * group_size
            block = popArray[row_start:row_start + group_size,
                             col_start:col_start + group_size]
            density = block.sum()
            row_densities.append(density)

        yield np.array(row_densities)


for row in densityKM_gen2(highResPop):
    print(row)

[2000. 2000. 2000. 2000.]
[2000. 2000. 2000. 2000.]
[2000. 2000. 2000. 2000.]
[2000. 2000. 2000. 2000.]

Exercise Answers¶

1.1) Use unpacking for pretty printing¶

1.2) Use try/except¶

1.3) Use standard library data classes¶

1.4) Use the built-in min and max functions¶

1.5) Open a file with a context manager¶

2.1) Tuple-based storage¶

2.2) Set-based look-up¶

2.3) Generator expression¶

2.4) Generator¶

2.5) Compare differences in speed using timeit¶

2.5.1) %timeit line magic¶

2.5.2) %%timeit cell magic¶

2.6) Check for speed bottlenecks in detail with cProfile¶

2.7) Stretch goal: Raster generator¶

2.5) Compare differences in speed using `timeit`¶

2.5.1) `%timeit` line magic¶

2.5.2) `%%timeit` cell magic¶

2.6) Check for speed bottlenecks in detail with `cProfile`¶