0%

Mosh的Python课程笔记(11)--Popular Python Package

Mosh的课程网址

Web Scraping

1
2
3
4
5
6
7
8
9
10
11
12
13
import requests
from bs4 import BeautifulSoup

response = requests.get("https://stackoverflow.com/questions")
soup = BeautifulSoup(response.text, "html.parser")

questions = soup.select(".question-summary")
print(questions[0].attrs)
print(questions[0].get("id", 0))

for question in questions:
print(question.select_one(".question-hyperlink").getText())
print(question.select_one(".vote-count-post").getText())

{‘class’: [‘question-summary’], ‘id’: ‘question-summary-67532680’}
question-summary-67532680
GKE Nginx Ingress Controller Oauth2 Proxy redirect
0
Workaround on nested async completion blocks from network calls? Without using PromiseKit
0
ax.text not being printed when using transform
0
Using html & javascript Populate textbox
0
How to pass JSON data through the Django backend to frontend view using Angular
0
Gitlab CI cannot pull private registry with DOCKER_AUTH_CONFIG
0
Shopify SKU Lookup using GraphQL
0

Browser Automation

自动打开chrome,跳转到github.com,登录。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from selenium import webdriver
import time

browser = webdriver.Chrome()
browser.set_window_size(1920, 1080) # 设置窗口大小

browser.get("https://github.com")

signin_link = browser.find_element_by_link_text("Sign in")
signin_link.click()

username_box = browser.find_element_by_id("login_field")
username_box.send_keys("PurpleMStone")
password_box = browser.find_element_by_id("password")
password_box.send_keys("zq15071452268")
password_box.submit()

assert "PurpleMStone" in browser.page_source

time.sleep(0.5)

# profile_link = browser.find_element_by_class_name("user-profile-link")
# link_label = profile_link.get_attribute("innerHTML")
# assert "PurpleMStone" in link_label

browser.quit()

Working with PDFs

Demo 1

1
2
3
4
5
6
7
8
9
10
11
12
import PyPDF2

with open("first.pdf", "rb") as file:
reader = PyPDF2.PdfFileReader(file)
print(reader.numPages)
page = reader.getPage(0) # 获取第一页
page.rotateClockwise(90) # 页面旋转90度

writer = PyPDF2.PdfFileWriter()
writer.addPage(page)
with open("rotated.pdf", "wb") as output:
writer.write(output)

Demo 2: 合并PDF

1
2
3
4
5
6
7
import PyPDF2

merger = PyPDF2.PdfFileMerger()
file_names = ["first.pdf", "second.pdf"]
for file_name in file_names:
merger.append(file_name)
merger.write("combined.pdf")

Excel Spreadsheets

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import openpyxl

wb = openpyxl.load_workbook("transactions.xlsx")
print(wb.sheetnames)

sheet = wb["Sheet1"]

# wb.create_sheet(["Sheet2", 0])
# wb.remove_sheet(sheet)

cell = sheet["a1"]
# print(cell.row)
# print(cell.column)
# print(cell.coordinate)
# cell = sheet.cell(row=1, column=1)

print(sheet.max_row)
print(sheet.max_column)

for row in range(1, sheet.max_row + 1):
for column in range(1, sheet.max_column + 1):
cell = sheet.cell(row, column)
# print(cell.value)

column = sheet["a"]
print(column)
cells = sheet["a:c"] # column
print(cells)

print(sheet[1:3]) # row

sheet.append([1, 2, 3]) # add a row at the end of the sheet
# sheet.insert_rows insert_columns, delete_rows, delete_columns

wb.save("transaction2.xlsx")

Numpy

1
2
3
4
5
6
import numpy as np

array = np.array([[1, 2, 3], [4, 5, 6]])
print(array)
print(type(np.array))
print(array.shape)
1
2
3
4
5
6
7
8
array = np.zeros((3, 4), dtype=int)
print(array)

array = np.ones((3, 4), dtype=int)
print(array)

array = np.full((3, 4), 5, dtype=int)
print(array)
1
2
3
4
5
6
7
8
9
10
11
12
array = np.random.random((3, 4))
print(array)
print(array[0, 0])

print(array > 0.2)

print(array[array > 0.2])

print(np.sum(array))
print(np.floor(array))
print(np.ceil(array))
print(np.round(array))
1
2
3
first = np.array([1, 2, 3])
second = np.array([1, 2, 3])
print(first + second)