Collections
Tuple
Heterogeneous immutable sequence. It can be nested.
t = ("Norway", 4.953, 3)
t[2]
len(t)
t + (33816.0, 265e9) # return a new tuple object which contains the concatenated tuple
t * 3 # repeats 3 times
nested_t = (("Norway", 1), (2, 3), ("abc", "d"))
nested_t[2][1]
t = (391) # this gives int
t = (391,) # this gives single element tuple
t = () # this gives empty tuple
p = 1, 1, 1, 4, 6, 19 # Delimiting parentheses are optional for one or more elements
#create tuple from existing object
tuple([1, 2, 3, 4, 5])
tuple("abcdefgh") #('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h')
5 in (3, 5, 6, 7)
10 not in (3, 5, 6, 7)
Tuples are useful for multiple return values. Tuple unpacking allows us to destructure directly into named references.
a, b = b, a # is the idiomatic Python swap
def minmax(items):
return min(items), max(items)
lower, upper = minmax([83, 33, 84, 32, 85])
String (str)
join() method is faster way than using + to concatenate strings, because + will create temp objects. Call join() method on the separator string.
colors = ";".join(["#45ff23", "#2321fa", "#1298a3"])
colors.split(";")
colors.split() #the diff is split() will split by space+ (one or more)
''.join(['high', 'way', 'ma'])
"abc" * 3 # means repeat the string for 3 times
ord('A') #get ascii code for char A
chr(num) #get the char from ascii code
ascii()
partition() method divides a string into three around a separator. If we don't use / want the separator, use underscore as a dummy for the variable to suppress unused variable warning.
depart, sep, arrival = "London:Endinburgh".partition(":")
depart, _, arrival = "London:Endinburgh".partition(":")
format(): integer field names matched with positional arguments. Field names can be omitted if used in sequence. Name fields are matched with keyword arguments.
"The age of {0} is {1}".format("Jim", 20)
"The age of {} is {}".format("Jim", 20)
"The age of {0} is {1}. {0}'s birthday is on {2}".format("Jim", 20, "October, 31")
"Current position {latitude} {longitude}".format(latitude="60N",
longitude="5E")
pos = (65.2, 23.1, 82.2)
"Galactic position x={pos[0]:.3f} y={pos[1]}, z={pos[2]}".format(pos=pos) #'Galactic position x=65.200 y=23.1, z=82.2'
import math
"Math constants: pi={m.pi: .6f}, e={m.e:.3f}".format(m=math) #'Math constants: pi= 3.141593, e=2.718'
Range
Arithmetic progression of integers.
range(10, 20, 2) #start stop step
#don't abuse range... (un-pythonic)
#prefer direct iteration over iterable objects such as lists
for v in s:
print(v)
#prefer enumerate() for counters, often combined with tuple unpacking
for i, v in enumerate(t):
print("i = {}, v = {}".format(i, v))
List
Allow heterogenous types. Allow extra comma at the end for maintainability.
a = [1, "apple", 8]
a = ['bear',
'giraffe',
'elephant',]
i = w.index('fox') #raise ValueError if not found
w.count('the')
'the' in w #'the' not in w
del u[3]
u.remove('jackdaws') #if item is not present, raise ValueError
a.insert(2, 'destroyed')
k += [18, 29, 47] #concatenate lists with + operator or extend()
k.extend([76, 129, 199])
k.pop() #remove last element
Indexing: negative integers index from the end, the last element is at index -1. avoid seq[len(seq)-1]
#important idiom for copying lists
full_slice = seq[:]
#a more readable ways, but all these 3 methods are shallow copies
s_copy = s.copy()
s_copy = list(s)
s = s[1:-1] #slicing works with negative indexes, this means all elements exclude the first and last element
Repetition: using the * operator. Most often used for initialising a list of known size with a constant. Repetition is shallow!
s = [constant] * size; #multiple references to one instance of the constant in the produced list.
a = [2]
k = a * 5
a = [3]
k #[2, 2, 2, 2, 2]
s = [[-1, 1]] * 5
s[3].append(7)
s #[[-1, 1, 7], [-1, 1, 7], [-1, 1, 7], [-1, 1, 7], [-1, 1, 7]]
Sorting:
key argument to sort() method accepts a function for producing a sort key from an item.
#reverse and sort in place
list.reverse()
list.sort(reverse=True)
names = ['a', 'abc', 'bb']
names.sort(key=len)
#returns a sorted / reversed iterator
y = sorted(x)
q = reversed(p)
Dictionary(dict)
key must be immutable: so string, number and tuple are fine but not list.
#dict() constructor accepts iterable series of key-value 2-tuples
names_and_ages = [('Alice', 32), ('Bob', 48), ('Peter', 11)]
d = dict(names_and_ages)
#this creates a dict object which will not give KeyError if the key doesn't exists.
#It will use the factory method list to create a new instance
colData = collections.defaultdict(list)
#keyword arguments - requires keys are valid Python identifiers
phonetic = dict(a='alfa', b='bravo', c='charlie', d='delta')
phonetic #{'a': 'alfa', 'b': 'bravo', 'c': 'charlie', 'd': 'delta'}
#Extend a dictionary with update(), update replaces values corresponding to duplicated keys
stocks = {'GOOG':891, 'AAPL': 416}
stocks.update({'GOOG':894, 'YAHOO': 25})
stocks #{'GOOG': 894, 'AAPL': 416, 'YAHOO': 25}
#iterating the dict
for key in colors:
print("{key} => {value}".format(key=key, value=colors[key]))
for key, value in colors.items(): #automatic unpacking of item tuple
print("{key} => {value}".format(key=key, value=value))
for value in colors.values():
print(value)
#the in and not in operators work on the keys only
'mkd' in symbols
del d[key] #Removal
#getOrDefault
sumIndexMap.get(accSum-k, i)
#build in prettify printing
from pprint import pprint as pp
pp(stocks)
Set
unordered collection of unique, immutable objects.
p = {1, 2, 3, 4}
d = {} # this is empty dict not empty set
d #{}
empty_set = set()
empty_set #set()
s.add(1)
s.update([1,2,3])
s.remove(97) #will raise KeyError if the element is not present
s.discard(98) #remove if the item is present
Set algebra
a.union(b) #commutative
a.intersection(b) #commutative
a.difference(b) #find all element in set a but not in set b, not commutative
a.symmetric_difference(b) #a.union(b) - a.intersection(b), commutative
a.issubset(b)
a.issuperset(b)
a.isdisjoint(b)
Comprehensions
declarative, functional, readable, expressive and effective.
[len(word) for word in words] #list comprehension
{len(word) for word in words} #set comprehension
capital_to_country = {capital: country for country, capital in country_to_capital.items()} #dict comprehension
words = ["hi", "hello", "fox", "hotel"]
{x[0]: x for x in words} #{'h': 'hotel', 'f': 'fox'} later keys overwrite earlier keys
#predicates
[x for x in range(101) if is_prime(x)]
Iterator
If the iterator reaches the end, it will give StopIteration exception.
iterator = iter(iterable)
try:
return next(iterator)
except StopIteration:
raise ValueError("iterable is empty")
Generator
Generator in Python is iterator.
def distinct(iterable):
seen = new set()
for item in iterable:
if item in seen:
continue
yield item #flow control returns to caller fun
seen.add(item)
def run_distinct():
items = [5,7,7,6,5,5]
for item in distinct(items): #flow control is directed to generator code
print (item)
#pipeline generator for example
for item in take(3, distinct(items)): #take is a generator takes k numbers of elements from the list
#generator comprehension
million_squares = (x*x for x in range(1, 1000001)) #() means this is a generator obj
list(million_squares) #create a list from generator iterator
list(million_squares) #repeats generate empty list as generator is a single use object.
#using generator in sum fun. This uses almost no memory. Second parentheses are optional.
sum(x*x for x in range(1, 10000001)) #equivalent to sum((x*x for x in range(1, 10000001)))
sum(x*x for x in range(1, 10000001) if is_prime(x)) #with predicate