Collections

Tuple

Heterogeneous immutable sequence. It can be nested.

t = ("Norway", 4.953, 3)
t[2]
len(t)
t + (33816.0, 265e9) # return a new tuple object which contains the concatenated tuple
t * 3 # repeats 3 times
nested_t = (("Norway", 1), (2, 3), ("abc", "d"))
nested_t[2][1]
t = (391) # this gives int
t = (391,) # this gives single element tuple
t = () # this gives empty tuple
p = 1, 1, 1, 4, 6, 19  # Delimiting parentheses are optional for one or more elements
#create tuple from existing object
tuple([1, 2, 3, 4, 5])
tuple("abcdefgh")  #('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h')
5 in (3, 5, 6, 7)
10 not in (3, 5, 6, 7)

Tuples are useful for multiple return values. Tuple unpacking allows us to destructure directly into named references.

a, b = b, a # is the idiomatic Python swap
def minmax(items):
    return min(items), max(items)

lower, upper = minmax([83, 33, 84, 32, 85])

String (str)

join() method is faster way than using + to concatenate strings, because + will create temp objects. Call join() method on the separator string.

colors = ";".join(["#45ff23", "#2321fa", "#1298a3"])
colors.split(";")
colors.split() #the diff is split() will split by space+ (one or more)
''.join(['high', 'way', 'ma'])

"abc" * 3 # means repeat the string for 3 times

ord('A') #get ascii code for char A
chr(num) #get the char from ascii code
ascii()

partition() method divides a string into three around a separator. If we don't use / want the separator, use underscore as a dummy for the variable to suppress unused variable warning.

depart, sep, arrival = "London:Endinburgh".partition(":")
depart, _, arrival = "London:Endinburgh".partition(":")

format(): integer field names matched with positional arguments. Field names can be omitted if used in sequence. Name fields are matched with keyword arguments.

"The age of {0} is {1}".format("Jim", 20)
"The age of {} is {}".format("Jim", 20)
"The age of {0} is {1}. {0}'s birthday is on {2}".format("Jim", 20, "October, 31")
"Current position {latitude} {longitude}".format(latitude="60N",
                                                 longitude="5E")
pos = (65.2, 23.1, 82.2)
"Galactic position x={pos[0]:.3f} y={pos[1]}, z={pos[2]}".format(pos=pos) #'Galactic position x=65.200 y=23.1, z=82.2'
import math
"Math constants: pi={m.pi: .6f}, e={m.e:.3f}".format(m=math) #'Math constants: pi= 3.141593, e=2.718'

Range

Arithmetic progression of integers.

range(10, 20, 2) #start stop step

#don't abuse range... (un-pythonic)
#prefer direct iteration over iterable objects such as lists
for v in s:
    print(v)

#prefer enumerate() for counters, often combined with tuple unpacking
for i, v in enumerate(t):
    print("i = {}, v = {}".format(i, v))

List

Allow heterogenous types. Allow extra comma at the end for maintainability.

a = [1, "apple", 8]
a = ['bear',
     'giraffe',
     'elephant',]

i = w.index('fox') #raise ValueError if not found
w.count('the')
'the' in w #'the' not in w
del u[3]
u.remove('jackdaws') #if item is not present, raise ValueError
a.insert(2, 'destroyed')
k += [18, 29, 47] #concatenate lists with + operator or extend()
k.extend([76, 129, 199])
k.pop() #remove last element

Indexing: negative integers index from the end, the last element is at index -1. avoid ~~seq[len(seq)-1]~~

#important idiom for copying lists
full_slice = seq[:]
#a more readable ways, but all these 3 methods are shallow copies
s_copy = s.copy()
s_copy = list(s)

s = s[1:-1] #slicing works with negative indexes, this means all elements exclude the first and last element

Repetition: using the * operator. Most often used for initialising a list of known size with a constant. Repetition is shallow!

s = [constant] * size; #multiple references to one instance of the constant in the produced list.

a = [2]
k = a * 5
a = [3]
k #[2, 2, 2, 2, 2]

s = [[-1, 1]] * 5
s[3].append(7)
s #[[-1, 1, 7], [-1, 1, 7], [-1, 1, 7], [-1, 1, 7], [-1, 1, 7]]

Sorting:

key argument to sort() method accepts a function for producing a sort key from an item.

#reverse and sort in place
list.reverse()
list.sort(reverse=True)

names = ['a', 'abc', 'bb']
names.sort(key=len)

#returns a sorted / reversed iterator
y = sorted(x)
q = reversed(p)

Dictionary(dict)

key must be immutable: so string, number and tuple are fine but not list.

#dict() constructor accepts iterable series of key-value 2-tuples
names_and_ages = [('Alice', 32), ('Bob', 48), ('Peter', 11)]
d = dict(names_and_ages)

#this creates a dict object which will not give KeyError if the key doesn't exists. 
#It will use the factory method list to create a new instance
colData = collections.defaultdict(list)

#keyword arguments - requires keys are valid Python identifiers
phonetic = dict(a='alfa', b='bravo', c='charlie', d='delta')
phonetic #{'a': 'alfa', 'b': 'bravo', 'c': 'charlie', 'd': 'delta'}

#Extend a dictionary with update(), update replaces values corresponding to duplicated keys
stocks = {'GOOG':891, 'AAPL': 416}
stocks.update({'GOOG':894, 'YAHOO': 25})
stocks #{'GOOG': 894, 'AAPL': 416, 'YAHOO': 25}

#iterating the dict
for key in colors:
    print("{key} => {value}".format(key=key, value=colors[key]))
for key, value in colors.items(): #automatic unpacking of item tuple
    print("{key} => {value}".format(key=key, value=value))
for value in colors.values():
    print(value)

#the in and not in operators work on the keys only
'mkd' in symbols

del d[key] #Removal

#getOrDefault
sumIndexMap.get(accSum-k, i)

#build in prettify printing
from pprint import pprint as pp
pp(stocks)

Set

unordered collection of unique, immutable objects.

p = {1, 2, 3, 4}

d = {} # this is empty dict not empty set
d #{}
empty_set = set()
empty_set #set()

s.add(1)
s.update([1,2,3])
s.remove(97) #will raise KeyError if the element is not present
s.discard(98) #remove if the item is present

Set algebra

a.union(b) #commutative
a.intersection(b) #commutative
a.difference(b) #find all element in set a but not in set b, not commutative
a.symmetric_difference(b) #a.union(b) - a.intersection(b), commutative
a.issubset(b)
a.issuperset(b)
a.isdisjoint(b)

Comprehensions

declarative, functional, readable, expressive and effective.

[len(word) for word in words] #list comprehension
{len(word) for word in words} #set comprehension 

capital_to_country = {capital: country for country, capital in country_to_capital.items()} #dict comprehension

words = ["hi", "hello", "fox", "hotel"]
{x[0]: x for x in words} #{'h': 'hotel', 'f': 'fox'} later keys overwrite earlier keys

#predicates
[x for x in range(101) if is_prime(x)]

Iterator

If the iterator reaches the end, it will give StopIteration exception.

iterator = iter(iterable)
try:
    return next(iterator)
except StopIteration:
    raise ValueError("iterable is empty")

Generator

Generator in Python is iterator.

def distinct(iterable):
    seen = new set()
    for item in iterable:
        if item in seen:
            continue
        yield item #flow control returns to caller fun
        seen.add(item)

def run_distinct():
    items = [5,7,7,6,5,5]
    for item in distinct(items): #flow control is directed to generator code
        print (item)

#pipeline generator for example
for item in take(3, distinct(items)): #take is a generator takes k numbers of elements from the list

#generator comprehension
million_squares = (x*x for x in range(1, 1000001)) #() means this is a generator obj
list(million_squares) #create a list from generator iterator
list(million_squares) #repeats generate empty list as generator is a single use object.

#using generator in sum fun. This uses almost no memory. Second parentheses are optional.
sum(x*x for x in range(1, 10000001)) #equivalent to sum((x*x for x in range(1, 10000001)))
sum(x*x for x in range(1, 10000001) if is_prime(x)) #with predicate

Collections

Collections

Tuple

String (str)

Range

List

Dictionary(dict)

Set

Comprehensions

Iterator

Generator

results matching ""

No results matching ""