0%

Mosh的Python课程笔记(5)--Data Structures

Mosh的课程网址

List

1
2
3
4
5
6
7
8
letters = ["a", "b", "c"]
matrix = [[0, 1], [2, 3]]

zeros = [0] * 5 # [0,0,0,0,0]
combined = zeros + letters # [0, 0, 0, 0, 0, 'a', 'b', 'c']
numbers = list(range(20))
chars = list("Hello World") # ['H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd']
print(len(chars)) # 11

Demo 1

1
2
3
4
5
6
7
letters = ["a", "b", "c", "d"]
print(letters[0])
print(letters[-1])
letters[0] = "A"
print(letters)
print(letters[:3])
print(letters[::2])
1
2
3
4
5
a
d
['A', 'b', 'c', 'd']
['A', 'b', 'c']
['A', 'c']

Demo 2

1
2
3
4
numbers = list(range(10))
print(numbers)
print(numbers[::2])
print(numbers[::-1]) # 倒序输出

输出结果:

1
2
3
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 2, 4, 6, 8]
[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

Unpacking Lists

Demo 1

1
2
3
numbers = [1, 2, 3]
first, second, third = numbers # first=1, second=2, third=3
first, second, _ = numbers

Demo 2

1
2
3
4
numbers2 = [1, 2, 3, 4, 4, 4, 4, 4, 4]
first, second, *other = numbers2
print(first) # 1
print(other) # [3, 4, 4, 4, 4, 4, 4]

与此同理:

1
2
3
4
5
6
7
8
9
def multiply(*numbers):
total = 1
for number in numbers:
total *= number
return total


print(multiply(2, 3, 4, 5))
print(multiply(4, 5, 6, 2, 5))

Another demo:

1
2
3
4
numbers2 = [1, 2, 3, 4, 4, 4, 4, 4, 9]
first, *other, last = numbers2
print(first, last) # 1 9
print(other) # [2, 3, 4, 4, 4, 4, 4]

输出:

1
2
{'id': 1, 'name': 'John', 'age': 22}
John

Looping over Lists

1
2
3
letters = ["a", "b", "c"]
for letter in enumerate(letters):
print(letter)

输出结果:

1
2
3
(0, 'a')
(1, 'b')
(2, 'c')

unpacking tuple:

1
2
3
letters = ["a", "b", "c"]
for index, letter in enumerate(letters):
print(index, letter)

输出结果:

1
2
3
0 a
1 b
2 c

Adding/Removing Items

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
letters = ["a", "b", "c", "d", "e"]

# Add
letters.append("f") # 在末尾加入
letters.insert(0, "-") # 在首部加入
print(letters)

# Remove
letters.pop() # 删除最后一个元素
print(letters)

letters.remove("b") # 删除特定元素
print(letters)

del letters[0:3] # 删除多个元素
print(letters)

letters.clear() # 清除所有元素
print(letters)

输出结果:

1
2
3
4
5
['-', 'a', 'b', 'c', 'd', 'e', 'f']
['-', 'a', 'b', 'c', 'd', 'e']
['-', 'a', 'c', 'd', 'e']
['d', 'e']
[]

Finding Items

1
2
3
4
letters = ["a", "b", "c"]
print(letters.count("d")) # 0
if "c" in letters:
print(letters.index("c")) # 2

Sorting Lists

Demo 1

1
2
3
numbers = [3, 51, 2, 8, 6]
numbers.sort(reverse=True) # 倒序
print(numbers)

输出结果:

1
[51, 8, 6, 3, 2]

Demo 2

built-in function: 不改变原数组

1
2
3
numbers = [3, 51, 2, 8, 6]
print(sorted(numbers, reverse=True))
print(numbers)

输出结果:

1
2
[51, 8, 6, 3, 2]
[3, 51, 2, 8, 6]

Demo 3

1
2
3
4
5
6
7
8
9
10
11
12
13
items = [
("product1", 10),
("product2", 9),
("product3", 12),
]


def sort_item(item):
return item[1]


items.sort(key=sort_item)
print(items)

输出结果:

1
[('product2', 9), ('product1', 10), ('product3', 12)]

Lambdas

改造上述的例子(Demo 3):

1
2
3
4
5
6
7
8
9
items = [
("product1", 10),
("product2", 9),
("product3", 12),
]


items.sort(key=lambda item: item[1])
print(items)

语法:

1
lambda parameters:expression

Map Function

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
items = [
("product1", 10),
("product2", 9),
("product3", 12),
]


# prices = []
# for item in items:
# prices.append(item[1])

# print(prices)

# A better way
prices = list(map(lambda item: item[1], items))
print(prices)

输出:

1
[10, 9, 12]

Filter Function

1
2
3
4
5
6
7
8
items = [
("product1", 10),
("product2", 9),
("product3", 12),
]

filtered = list(filter(lambda item: item[1] >= 10, items))
print(filtered)

输出结果:

1
[('product1', 10), ('product3', 12)]

List Comprehension

1
[expression for item in items]

改造上面的 map function 和 filter function:

1
2
3
prices = [item[1] for item in items]

filtered = [item for item in items if item[1] >= 10]

Zip Function

1
2
3
4
list1 = [1, 2, 3]
list2 = [10, 20, 30]

print(list(zip("abc", list1, list2)))

输出结果:

1
[('a', 1, 10), ('b', 2, 20), ('c', 3, 30)]

Stacks

LIFO: Last In - First Out

1
2
3
4
5
6
browsing_session = []
browsing_session.append(1) # 压栈
browsing_session.append(2)

if not not browsing_session:
print(browsing_session.pop()) # 出栈

Queues

FIFO: First In - First Out

1
2
3
4
5
6
7
8
9
10
11
from collections import deque

queue = deque([])
queue.append(1) # 入队
queue.append(2)
queue.append(3)
queue.popleft() # 出队
print(queue)

if not queue: # 检查队列是否为空
print("Empty")

Tuples

1
2
3
4
5
# 都是tuple:
point = 1, 2
# point = 1,
# point = ()
print(type(point))

Demo

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
point = (1, 2) + (3, 4)
print(point) # (1, 2, 3, 4)

point = (1, 2) * 3
print(point) # (1, 2, 1, 2, 1, 2)

point = tuple([1, 2])
print(point) # (1, 2)


point = tuple("hello")
print(point) # ('h', 'e', 'l', 'l', 'o')

point = (1, 2, 3)
print(point[0:2]) # (1, 2)

x, y, z = point
if 10 in point:
print("exists")

Swapping Variables

1
2
3
4
5
6
7
x = 10
y = 11

z = x
x = y
y = z
print(x, y) # 11 10

simpler way:

1
2
x, y = y, x
print(x, y)

实际上是先定义了一个tuple,然后给x, y赋值,即 x, y = (11, 10)

Arrays

1
2
3
4
5
6
7
8
from array import array

numbers = array("i", [1, 2, 3])
# numbers[0] = 1.0 # TypeError: integer argument expected, got float

list1 = [1.0, 1, 2, 3]
print(type(list1[0])) # <class 'float'>
print(type(list1[1])) # <class 'int'>

arrays 可以调用 append, pop, index 等与 lists 相同的 built-in function,但是只能有一种数据类型(由参数 typecode 如 ”i“ 指定)

而一个 list 里面可以有多种数据类型:

1
2
3
list1 = [1.0, 1, 2, 3]
list1.append("hello")
print(list1) # [1.0, 1, 2, 3, 'hello']

Sets

一个 set 内不能有重复项

1
2
3
numbers = [1, 1, 2, 3, 4]
first = set(numbers)
print(first) # {1, 2, 3, 4}
1
2
3
4
5
6
7
second = {1, 4}
second.add(5)
print(second) # {1, 4, 5}

second.remove(5)
len(second)
print(second) # {1, 4}

Demo

1
2
3
4
5
6
7
8
9
numbers = [1, 1, 2, 3, 4]
first = set(numbers)

second = {1, 5}

print(first | second) # 在first或在second
print(first & second) # 在first也在second
print(first - second) # 在first不在second
print(first ^ second) # 在first或在second但不同时在两个set

输出结果:

1
2
3
4
{1, 2, 3, 4, 5}
{1}
{2, 3, 4}
{2, 3, 4, 5}

set是无序的,不能用index,如first[0]是不合法的

1
2
if 1 in first:		# 判断set里是否有某元素
print("yes")

Dictionary

创建

1
2
3
# Two ways to create a dictionary
point = {"x": 1, "y": 2}
point = dict(x=1, y=2)

增改

1
2
3
4
point["x"] = 10		# modify element
print(point) # {'x': 10, 'y': 2}
point["z"] = 20 # create new element
print(point) # {'x': 10, 'y': 2, 'z': 20}

获取

1
2
print(point.get("a"))			# None
print(point.get("a", 0)) # 0 (若没有指定的key,则返回指定值0)

删除

1
2
del point["x"]		# 删除元素
print(point) # {'y': 2, 'z': 20}

for loop

1
2
3
4
5
6
7
8
for key in point:
print(key, point[key])

for item in point.items(): # 返回tuple
print(item)

for key, value in point.items():
print(key, value)

返回:

1
2
3
4
5
6
y 2
z 20
('y', 2)
('z', 20)
y 2
z 20

Dictionary Comprehensions

1
2
3
4
5
# values = []
# for x in range(5):
# values.append(x * 2)

values = [x * 2 for x in range(5)] # 作用同上

set、dictionary 的:

1
2
3
4
5
values = {x * 2 for x in range(5)}
print(values) # {0, 2, 4, 6, 8}

values = {x: x * 2 for x in range(5)}
print(values) # {0: 0, 1: 2, 2: 4, 3: 6, 4: 8}

Generators

用于有大量(无限)元素,要节省内存空间的情形

1
2
3
4
5
6
7
from sys import getsizeof

values = (x * 2 for x in range(100000)) # generator
print("gen:", getsizeof(values))

values = [x * 2 for x in range(100000)] # list
print("list:", getsizeof(values))

输出:

1
2
gen: 112
list: 800984

对于 generator object, 无论有多少元素,都只占112的位置(就算只有一个元素也是)。generator object 在迭代时才生成元素,而不是将所有的元素都存在内存中。

Unpacking Operator

Demo 1

1
2
3
numbers = [1, 2, 3]
print(numbers) # [1, 2, 3]
print(*numbers) # 1 2 3

numbers 是一个list, 而 *numbers 是 unpack 之后的3个独立的数

Demo 2

1
2
3
values = list(range(5))
values = [*range(5), *"Hello"]
print(values) # [0, 1, 2, 3, 4, 'H', 'e', 'l', 'l', 'o']

Demo 3

组合两个lists:

1
2
3
4
first = [1, 2]
second = [3]
values = [*first, "a", *second, *"Hello"]
print(values) # [1, 2, 'a', 3, 'H', 'e', 'l', 'l', 'o']

Demo 4

组合两个字典:

1
2
3
4
first = {"x": 1}
second = {"x": 10, "y": 2}
combined = {**first, **second, "z": 1}
print(combined) # {'x': 10, 'y': 2, 'z': 1}

相同 key 值的元素 (“x”) 取最后一个元素的 value (10)

Exercise

得出一个句子中出现频率最高的字母:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
from pprint import pprint

sentence = "This is a common interview question"

char_frequency = {}
for char in sentence:
if char in char_frequency:
char_frequency[char] += 1
else:
char_frequency[char] = 1
pprint(char_frequency, width=1) # 一行只打印一个

char_frequency_sorted = sorted(
char_frequency.items(),
key=lambda kv: kv[1],
reverse=True)

print(char_frequency_sorted[0])

输出:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
{' ': 5,
'T': 1,
'a': 1,
'c': 1,
'e': 3,
'h': 1,
'i': 5,
'm': 2,
'n': 3,
'o': 3,
'q': 1,
'r': 1,
's': 3,
't': 2,
'u': 1,
'v': 1,
'w': 1}
('i', 5)

我的改进:

1
2
3
4
5
6
7
8
9
10
11
12
#char_frequency = {}
#for char in sentence:
# if char in char_frequency:
# char_frequency[char] += 1
# else:
# char_frequency[char] = 1
# pprint(char_frequency, width=1) # 一行只打印一个

char_frequency = {}
for char in sentence:
char_frequency[char] = char_frequency.get(char, 0) + 1
pprint(char_frequency, width=1)