A website may have N addresses. For example, selecting a category entry is a URL address. So how can we traverse them all and check whether they are loading normally? Here we can use the query database and use regular rules Automatically splice URLs and traverse all links. Share the code below.
1: Create a headless browser
2: Connect to the database
3: Call the Feishu method, copy it directly and then modify the Feishu robot address. It is universal (DingTalk and SMS also have their own encapsulated methods)
4: Create a method (check_entries) to check whether the web page is loading normally. What I use here is to simulate the user opening the browser, performing sliding behavior, and then checking whether the page contains the content I want. If the content is exposed, it means it is normal and failed. Then alarm (you can modify or write what you need according to your own needs)
5: Check the availability of a set of URL addresses and send the results through Feishu messages. Specific steps are as follows:
(1) Define a dictionary url_sql_dict, where the key is the URL prefix and the value is the corresponding SQL query statement.
(2) Traverse each URL prefix and SQL query statement in the dictionary.
(3) If the SQL query statement is empty, directly check whether the URL address is normal. If it is normal, print “Address is normal”; otherwise, print “URL is abnormal” and send an alarm message through Feishu message.
(4) If the SQL query statement is not empty, execute the SQL query statement and obtain the query results.
(5) Traverse the query result list and print field values.
(6) Determine whether the link already contains the .html suffix. If included, directly check whether the URL address is normal; otherwise, splice part of the URL link path and add the .html suffix.
(7) If the URL address is normal, print “Address Normal”; otherwise, print “URL Abnormal” and send an alarm message through Feishu messages.
from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.keys import Keys from selenium.common.exceptions import TimeoutException from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait import urllib.parse import time import requests import json #Create a Chrome options object for setting headless mode options = Options() options.add_argument("--headless") # Turn on headless mode options.add_argument("--disable-gpu") # Disable GPU acceleration #Create a webdriver object and pass in the option parameters driver = webdriver.Chrome(options=options) # Find the browser driver location #Import the pymysql library and create a database connection object importpymysql conn = pymysql.connect( host='rm-uf6mp542XXXXXXXXXXo.mysql.rds.aliyuncs.com',#Database link user='root',#database account password='XXXXXXXXXX',#Database password db='XXXXXXXXXX',#Library name charset='utf8mb4', ) class FeishuTalk: # Feishu robot address chatGPT_url = 'https://open.feishu.cn/open-apis/bot/v2/hook/6ee0be57-847a-46a5-95e5-bbee62289b28' # Call the robot's message sending method def sendTextmessage(self, content): url = self.chatGPT_url headers = { "Content-Type": "application/json; charset=utf-8", } payload_message = { "msg_type": "text", "content": { # @Single user <at user_id="ou_xxx">Name</at> "text": content + "<at user_id="bf888888">XXX</at>" # @Everyone <at user_id="all">Everyone</at> # "text": content + "<at user_id="all">test</at>" } } response = requests.post(url=url, data=json.dumps(payload_message), headers=headers) return response.json #Create a method to check whether the buried point exposure is normal def check_entries(driver, url): try: driver.set_page_load_timeout(15) driver.get(url) time.sleep(2) # Wait 3 seconds to prevent the page from loading. driver.maximize_window() # Enlarge the browser time.sleep(1) js = "var q=document.documentElement.scrollTop=1500" # Slide down 1500 pixels driver.execute_script(js) # Execute slide down operation time.sleep(2) flag = True while True: # Get all interface request information entries = driver.execute_script("return window.performance.getEntries()") # Determine whether the buried point is abnormal for entry in entries: entry_url = entry['name'] # Decode the URL and restore special characters entry_url = urllib.parse.unquote(entry_url) # Get the request URL if 'https://md.XXXX.com/s.gif' in entry['name'] or 'https://md.XXXX.com/c.gif' in entry[ 'name'] or 'https://md.XXXX.com/i.gif' in entry['name']: if "bhv_type=exposure" in entry_url: flag=False return True # Return True break if flag: return False driver.quit() except TimeoutException: return False if __name__ == '__main__': try: with conn.cursor() as cursor: # Define the path in front of the URL and the dictionary of the SQL statement url_sql_dict = { 'https://www.XXXX.com/':"", 'https://XXXX.com/XXXX/': "SELECT field FROM table name condition ", 'https://BB./BB.com//BB/': "SELECT field FROM table name condition ", } # Traverse the path in front of the URL and the dictionary of SQL statements for url_prefix, sql in url_sql_dict.items(): # Determine whether the SQL statement is empty if sql == "": # Directly check whether the URL is normal url = url_prefix try: if check_entries(driver, url): print(url,'Address is normal') #Print the check results else: print("URL exception") content = 'URL exception, please check ' + url # Define alarm information FeishuTalk().sendTextmessage(content) # Send Feishu alarm except Exception as e: print("URL check error:", e) content = 'URL check error, please check network connection and driver settings' FeishuTalk().sendTextmessage(content) else: # Query the fields of the partial path of the URL link cursor.execute(sql) # Get query results results = cursor.fetchall() # Traverse the query result list for result in results: print(result[0]) #Print field value # Determine whether the link already contains the .html suffix if result[0].endswith('.html'): # Directly check whether the URL is normal url = url_prefix + result[0] else: # Splice URL link partial paths url = url_prefix + result[0] + '.html' # Check if the URL is normal try: if check_entries(driver, url): print(url,'Address is normal') #Print the check results else: print("URL exception") content = 'URL exception, please check ' + url # Define alarm information FeishuTalk().sendTextmessage(content) # Send Feishu alarm except Exception as e: print("URL check error:", e) content = 'URL check error, please check network connection and driver settings' FeishuTalk().sendTextmessage(content) except Exception as e: print("Query error:", e) content = 'Query error, please check the database connection and SQL statement' FeishuTalk().sendTextmessage(content) driver.quit() time.sleep(2) finally: conn.close() driver.quit()