Exercise Answers¶
The code cells below are example answers to the workshop exercises. They are useful if you get stuck and need a hint or if you want to use them as a comparison with your own attempts
1.1) Use unpacking for pretty printing¶
In [ ]:
Copied!
counties = ["Anoka", "Dakota", "Carver", "Hennepin", "Ramsey", "Scott", "Washington"]
print(*counties, sep='\n')
counties = ["Anoka", "Dakota", "Carver", "Hennepin", "Ramsey", "Scott", "Washington"]
print(*counties, sep='\n')
Anoka Dakota Carver Hennepin Ramsey Scott Washington
1.2) Use try/except¶
In [ ]:
Copied!
from math import inf
from typing import NamedTuple
class Record(NamedTuple):
total_population: int
population_in_poverty: int
record1 = Record(5000, 200)
record2 = Record(200, 0)
for field in Record._fields:
try:
ratio = getattr(record1, field) / getattr(record2, field)
except ZeroDivisionError:
ratio = inf
print(ratio)
from math import inf
from typing import NamedTuple
class Record(NamedTuple):
total_population: int
population_in_poverty: int
record1 = Record(5000, 200)
record2 = Record(200, 0)
for field in Record._fields:
try:
ratio = getattr(record1, field) / getattr(record2, field)
except ZeroDivisionError:
ratio = inf
print(ratio)
25.0 inf
1.3) Use standard library data classes¶
In [ ]:
Copied!
from dataclasses import dataclass
@dataclass
class Record:
total_population: int
population_in_poverty: int
record = Record(5000, 200)
record.total_population = 6000
print(record)
from dataclasses import dataclass
@dataclass
class Record:
total_population: int
population_in_poverty: int
record = Record(5000, 200)
record.total_population = 6000
print(record)
Record(total_population=6000, population_in_poverty=200)
1.4) Use the built-in min and max functions¶
In [ ]:
Copied!
from random import randint
nums = [randint(-1000, 1000) for i in range(20)]
print(max(nums), min(nums))
from random import randint
nums = [randint(-1000, 1000) for i in range(20)]
print(max(nums), min(nums))
891 -899
1.5) Open a file with a context manager¶
In [ ]:
Copied!
with open("exercise.txt", "w") as f:
f.write("This is example text for an exercise.")
with open("exercise.txt", "w") as f:
f.write("This is example text for an exercise.")
2.1) Tuple-based storage¶
In [1]:
Copied!
import sys
def tupleFromRange(r1, r2):
"""Create a tuple from a range of values"""
return tuple(range(r1, r2+1))
start = 1900
end = 2030
studyYears = tupleFromRange(start, end)
print(studyYears)
print("Bytes used: ", sys.getsizeof(studyYears))
import sys
def tupleFromRange(r1, r2):
"""Create a tuple from a range of values"""
return tuple(range(r1, r2+1))
start = 1900
end = 2030
studyYears = tupleFromRange(start, end)
print(studyYears)
print("Bytes used: ", sys.getsizeof(studyYears))
(1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030) Bytes used: 1088
2.2) Set-based look-up¶
In [2]:
Copied!
placeNames_list = ["Kinshasa", "Duluth", "Uruguay", "Doherty Residence", "Dinkytown", "Khazad-dûm"]
placeNames_set = set(placeNames_list)
# Set look-up
if "Dinkytown" not in placeNames_set:
print("Missing.") # O(1) look-up
placeNames_list = ["Kinshasa", "Duluth", "Uruguay", "Doherty Residence", "Dinkytown", "Khazad-dûm"]
placeNames_set = set(placeNames_list)
# Set look-up
if "Dinkytown" not in placeNames_set:
print("Missing.") # O(1) look-up
2.3) Generator expression¶
In [3]:
Copied!
import random
import string
# Input dataset: A list of random strings. Each string is 8 letters long.
randomStrings = [''.join(random.choices(string.ascii_letters, k=8)) for i in range(10)]
print(randomStrings)
# Convert all strings to lowercase
lowerCase_gen = (x.lower() for x in randomStrings)
for x in lowerCase_gen:
print(x)
import random
import string
# Input dataset: A list of random strings. Each string is 8 letters long.
randomStrings = [''.join(random.choices(string.ascii_letters, k=8)) for i in range(10)]
print(randomStrings)
# Convert all strings to lowercase
lowerCase_gen = (x.lower() for x in randomStrings)
for x in lowerCase_gen:
print(x)
['qwUYHnBg', 'yypGsKIi', 'bRVmubKp', 'RbEdRWKF', 'zCRwzvBG', 'swaWiWNs', 'gXndKiBa', 'acRnakiC', 'kuElpvqh', 'fvyfTINZ'] qwuyhnbg yypgskii brvmubkp rbedrwkf zcrwzvbg swawiwns gxndkiba acrnakic kuelpvqh fvyftinz
2.4) Generator¶
In [4]:
Copied!
# The list that each dataset will be compared to.
primary = [4, 7, 140, 55, 7, 91, 6]
# Input datasets
inputs = (
[0, 3, 40, 55, 6, 98, 4],
[5, 4, 3, 45, 1, 67, 2],
[7, 150, 0.5, 1]
)
def matchingStructure_gen(inputsList, primList):
"""
This generator compares the length of each input collection to the primary
list. An input that matches in length gets multiplied by the primary list and
yielded.
"""
for item in inputsList:
if len(item) == len(primList):
multiplied = [b - a for a, b in zip(item, primList)]
yield multiplied
for item in matchingStructure_gen(inputs, primary):
print(item)
# The list that each dataset will be compared to.
primary = [4, 7, 140, 55, 7, 91, 6]
# Input datasets
inputs = (
[0, 3, 40, 55, 6, 98, 4],
[5, 4, 3, 45, 1, 67, 2],
[7, 150, 0.5, 1]
)
def matchingStructure_gen(inputsList, primList):
"""
This generator compares the length of each input collection to the primary
list. An input that matches in length gets multiplied by the primary list and
yielded.
"""
for item in inputsList:
if len(item) == len(primList):
multiplied = [b - a for a, b in zip(item, primList)]
yield multiplied
for item in matchingStructure_gen(inputs, primary):
print(item)
[4, 4, 100, 0, 1, -7, 2] [-1, 3, 137, 10, 6, 24, 4]
2.5) Compare differences in speed using timeit
¶
2.5.1) %timeit
line magic¶
In [5]:
Copied!
%timeit [i for i in range(50) if i % 2 == 0]
%timeit (i for i in range(50) if i % 2 == 0)
%timeit [i for i in range(50) if i % 2 == 0]
%timeit (i for i in range(50) if i % 2 == 0)
4.2 µs ± 833 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each) 505 ns ± 19.3 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
2.5.2) %%timeit
cell magic¶
In [6]:
Copied!
%%timeit -n 1 -r 1
employeeDatabase = [
{'lastName': 'Knope', 'rate': 72000, 'pay_class': 'annual'},
{'lastName': 'Gergich', 'rate': 17, 'pay_class': 'hourly'},
{'lastName': 'Ludgate', 'rate': 60000, 'pay_class': 'annual'},
{'lastName': 'Swanson', 'rate': 'redacted', 'pay_class': 'redacted'},
{'lastName': 'Haverford', 'rate': 52000, 'pay_class': 'annual'}
]
def hourly_rate(payments):
"""Function that returns each salaried workers' hourly rate."""
hourlyRates = []
for worker in payments:
if worker.get('pay_class') == 'annual':
hourly = worker['rate'] / 2080
hourlyRates.append(hourly)
return hourlyRates
# Sum hourly rates for those receiving an annual salary.
salariesPerHour = sum(hourly_rate(employeeDatabase))
print(f"Total dispersments per hour for salaried employees: ${salariesPerHour:.2f}")
%%timeit -n 1 -r 1
employeeDatabase = [
{'lastName': 'Knope', 'rate': 72000, 'pay_class': 'annual'},
{'lastName': 'Gergich', 'rate': 17, 'pay_class': 'hourly'},
{'lastName': 'Ludgate', 'rate': 60000, 'pay_class': 'annual'},
{'lastName': 'Swanson', 'rate': 'redacted', 'pay_class': 'redacted'},
{'lastName': 'Haverford', 'rate': 52000, 'pay_class': 'annual'}
]
def hourly_rate(payments):
"""Function that returns each salaried workers' hourly rate."""
hourlyRates = []
for worker in payments:
if worker.get('pay_class') == 'annual':
hourly = worker['rate'] / 2080
hourlyRates.append(hourly)
return hourlyRates
# Sum hourly rates for those receiving an annual salary.
salariesPerHour = sum(hourly_rate(employeeDatabase))
print(f"Total dispersments per hour for salaried employees: ${salariesPerHour:.2f}")
Total dispersments per hour for salaried employees: $88.46 79.4 µs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
2.6) Check for speed bottlenecks in detail with cProfile
¶
In [7]:
Copied!
import cProfile
with cProfile.Profile() as pr:
dataList = [x for x in range(1, 10_000_000)]
dataTuple = tuple(x for x in range(1, 10_000_000))
listFromList = []
listFromTuple = []
for item in dataList:
new = item + 1
listFromList.append(new)
for item in dataTuple:
new = item + 1
listFromTuple.append(new)
pr.print_stats('cumtime')
import cProfile
with cProfile.Profile() as pr:
dataList = [x for x in range(1, 10_000_000)]
dataTuple = tuple(x for x in range(1, 10_000_000))
listFromList = []
listFromTuple = []
for item in dataList:
new = item + 1
listFromList.append(new)
for item in dataTuple:
new = item + 1
listFromTuple.append(new)
pr.print_stats('cumtime')
30000008 function calls in 4.517 seconds Ordered by: cumulative time ncalls tottime percall cumtime percall filename:lineno(function) 10000000 1.981 0.000 1.981 0.000 <ipython-input-7-233c72c0aca7>:5(<genexpr>) 19999998 1.670 0.000 1.670 0.000 {method 'append' of 'list' objects} 1 0.866 0.866 0.866 0.866 <ipython-input-7-233c72c0aca7>:4(<listcomp>) 1 0.000 0.000 0.000 0.000 cProfile.py:41(print_stats) 1 0.000 0.000 0.000 0.000 pstats.py:108(__init__) 1 0.000 0.000 0.000 0.000 pstats.py:118(init) 1 0.000 0.000 0.000 0.000 pstats.py:137(load_stats) 1 0.000 0.000 0.000 0.000 cProfile.py:51(create_stats) 1 0.000 0.000 0.000 0.000 {built-in method builtins.isinstance} 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects} 1 0.000 0.000 0.000 0.000 {built-in method builtins.len} 1 0.000 0.000 0.000 0.000 {built-in method builtins.hasattr}
2.7) Stretch goal: Raster generator¶
In [8]:
Copied!
# # # Exercise solution, version 1 # # #
import numpy as np
# Starting dataset: 80x80 grid of people per 500m².
highResPop = np.ones((80, 80)) * 5
def densityKM_gen(popArray):
"""
Generator that yields rows of population density per km² cells from a
500 m² resolution population source.
Input: 500x500m 2D array
Output: Each yield output is a 1D array representing one row of densities.
"""
group_size = 20
rows, cols = popArray.shape
# Aggregate
kmArray = popArray.reshape(
rows // group_size, group_size,
cols // group_size, group_size
)
# Sum over each group
kmDensity = kmArray.sum(axis=(1, 3))
for row in kmDensity:
yield row # Now yields an array
for row in densityKM_gen(highResPop):
print(row)
# # # Exercise solution, version 1 # # #
import numpy as np
# Starting dataset: 80x80 grid of people per 500m².
highResPop = np.ones((80, 80)) * 5
def densityKM_gen(popArray):
"""
Generator that yields rows of population density per km² cells from a
500 m² resolution population source.
Input: 500x500m 2D array
Output: Each yield output is a 1D array representing one row of densities.
"""
group_size = 20
rows, cols = popArray.shape
# Aggregate
kmArray = popArray.reshape(
rows // group_size, group_size,
cols // group_size, group_size
)
# Sum over each group
kmDensity = kmArray.sum(axis=(1, 3))
for row in kmDensity:
yield row # Now yields an array
for row in densityKM_gen(highResPop):
print(row)
[2000. 2000. 2000. 2000.] [2000. 2000. 2000. 2000.] [2000. 2000. 2000. 2000.] [2000. 2000. 2000. 2000.]
In [9]:
Copied!
# # # Exercise solution, version 2 (even more memory efficient) # # #
import numpy as np
# Starting dataset: 80x80 grid of people per 500m².
highResPop = np.ones((80, 80)) * 5
def densityKM_gen2(popArray):
"""
Generator that yields rows of population density per km² cells from a
500 m² resolution population source.
Unlike Solution Version 1, this generator does not create the entire km²
array in memory. It saves memory by processing one group of 20x20
cells at a time.
Input: 500x500m 2D array
Output: Each yield is a 1D NumPy array representing one row of km²
densities, processed group by group.
"""
import numpy as np
group_size = 20
rows, cols = popArray.shape
num_row_blocks = rows // group_size
num_col_blocks = cols // group_size
for i in range(num_row_blocks):
row_densities = []
row_start = i * group_size
for j in range(num_col_blocks):
col_start = j * group_size
block = popArray[row_start:row_start + group_size,
col_start:col_start + group_size]
density = block.sum()
row_densities.append(density)
yield np.array(row_densities)
for row in densityKM_gen2(highResPop):
print(row)
# # # Exercise solution, version 2 (even more memory efficient) # # #
import numpy as np
# Starting dataset: 80x80 grid of people per 500m².
highResPop = np.ones((80, 80)) * 5
def densityKM_gen2(popArray):
"""
Generator that yields rows of population density per km² cells from a
500 m² resolution population source.
Unlike Solution Version 1, this generator does not create the entire km²
array in memory. It saves memory by processing one group of 20x20
cells at a time.
Input: 500x500m 2D array
Output: Each yield is a 1D NumPy array representing one row of km²
densities, processed group by group.
"""
import numpy as np
group_size = 20
rows, cols = popArray.shape
num_row_blocks = rows // group_size
num_col_blocks = cols // group_size
for i in range(num_row_blocks):
row_densities = []
row_start = i * group_size
for j in range(num_col_blocks):
col_start = j * group_size
block = popArray[row_start:row_start + group_size,
col_start:col_start + group_size]
density = block.sum()
row_densities.append(density)
yield np.array(row_densities)
for row in densityKM_gen2(highResPop):
print(row)
[2000. 2000. 2000. 2000.] [2000. 2000. 2000. 2000.] [2000. 2000. 2000. 2000.] [2000. 2000. 2000. 2000.]