Mosh的课程网址
Web Scraping 1 2 3 4 5 6 7 8 9 10 11 12 13 import requestsfrom bs4 import BeautifulSoupresponse = requests.get("https://stackoverflow.com/questions" ) soup = BeautifulSoup(response.text, "html.parser" ) questions = soup.select(".question-summary" ) print(questions[0 ].attrs) print(questions[0 ].get("id" , 0 )) for question in questions: print(question.select_one(".question-hyperlink" ).getText()) print(question.select_one(".vote-count-post" ).getText())
{‘class’: [‘question-summary’], ‘id’: ‘question-summary-67532680’} question-summary-67532680 GKE Nginx Ingress Controller Oauth2 Proxy redirect 0 Workaround on nested async completion blocks from network calls? Without using PromiseKit 0 ax.text not being printed when using transform 0 Using html & javascript Populate textbox 0 How to pass JSON data through the Django backend to frontend view using Angular 0 Gitlab CI cannot pull private registry with DOCKER_AUTH_CONFIG 0 Shopify SKU Lookup using GraphQL 0
Browser Automation 自动打开chrome,跳转到github.com,登录。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 from selenium import webdriverimport timebrowser = webdriver.Chrome() browser.set_window_size(1920 , 1080 ) browser.get("https://github.com" ) signin_link = browser.find_element_by_link_text("Sign in" ) signin_link.click() username_box = browser.find_element_by_id("login_field" ) username_box.send_keys("PurpleMStone" ) password_box = browser.find_element_by_id("password" ) password_box.send_keys("zq15071452268" ) password_box.submit() assert "PurpleMStone" in browser.page_sourcetime.sleep(0.5 ) browser.quit()
Working with PDFs Demo 1
1 2 3 4 5 6 7 8 9 10 11 12 import PyPDF2with open("first.pdf" , "rb" ) as file: reader = PyPDF2.PdfFileReader(file) print(reader.numPages) page = reader.getPage(0 ) page.rotateClockwise(90 ) writer = PyPDF2.PdfFileWriter() writer.addPage(page) with open("rotated.pdf" , "wb" ) as output: writer.write(output)
Demo 2: 合并PDF
1 2 3 4 5 6 7 import PyPDF2merger = PyPDF2.PdfFileMerger() file_names = ["first.pdf" , "second.pdf" ] for file_name in file_names: merger.append(file_name) merger.write("combined.pdf" )
Excel Spreadsheets 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 import openpyxlwb = openpyxl.load_workbook("transactions.xlsx" ) print(wb.sheetnames) sheet = wb["Sheet1" ] cell = sheet["a1" ] print(sheet.max_row) print(sheet.max_column) for row in range(1 , sheet.max_row + 1 ): for column in range(1 , sheet.max_column + 1 ): cell = sheet.cell(row, column) column = sheet["a" ] print(column) cells = sheet["a:c" ] print(cells) print(sheet[1 :3 ]) sheet.append([1 , 2 , 3 ]) wb.save("transaction2.xlsx" )
Numpy 1 2 3 4 5 6 import numpy as nparray = np.array([[1 , 2 , 3 ], [4 , 5 , 6 ]]) print(array) print(type(np.array)) print(array.shape)
1 2 3 4 5 6 7 8 array = np.zeros((3 , 4 ), dtype=int) print(array) array = np.ones((3 , 4 ), dtype=int) print(array) array = np.full((3 , 4 ), 5 , dtype=int) print(array)
1 2 3 4 5 6 7 8 9 10 11 12 array = np.random.random((3 , 4 )) print(array) print(array[0 , 0 ]) print(array > 0.2 ) print(array[array > 0.2 ]) print(np.sum(array)) print(np.floor(array)) print(np.ceil(array)) print(np.round(array))
1 2 3 first = np.array([1 , 2 , 3 ]) second = np.array([1 , 2 , 3 ]) print(first + second)