package com.gxq.model.controller; import cn.hutool.core.thread.ThreadUtil; import lombok.AllArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements; import org.openqa.selenium.WebDriver; import org.openqa.selenium.chrome.ChromeDriver; import org.openqa.selenium.chrome.ChromeDriverService; import org.openqa.selenium.chrome.ChromeOptions; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; import java.io.File; import java.io.IOException; /** * Get web image Controller * * @author Xiaoqiang * @date July 9, 2023 22:53:17 */ @RestController @Slf4j @AllArgsConstructor @RequestMapping("/test") public class TestController {<!-- --> /** * Get pictures * * @param idnex Which picture? * @return */ @GetMapping("/get-image") public String getImage(Integer index) {<!-- --> String src = null; try {<!-- --> String url = "www.baidu.com"; log.info("Request url====={}",url); //Set the path of Selenium WebDriver File file = new File("C:\chromedriver\chromedriver.exe"); log.info("Selenium WebDriver path===={}","C:\chromedriver\chromedriver.exe"); // Create new service ChromeDriverService service = new ChromeDriverService.Builder().usingDriverExecutable(file).usingAnyFreePort().build(); // Start service service.start(); ChromeOptions chromeOptions = new ChromeOptions(); chromeOptions.addArguments("--headless"); //headless browser chromeOptions.addArguments("--no-sandbox"); //No interface chromeOptions.addArguments("--disable-gpu"); chromeOptions.addArguments("lang=zh_CN.UTF-8"); //Generate a headless browser WebDriver driver = new ChromeDriver(service, chromeOptions); // Create ChromeDriver instance //Open the target page driver.get(url); // Wait for the page to load try {<!-- --> log.info("Waiting for page loading to complete"); Thread.sleep(2000); } catch (InterruptedException e) {<!-- --> e.printStackTrace(); } // Use Jsoup to parse HTML Document doc = Jsoup.parse(driver.getPageSource()); // Get image elements Elements imgElements = doc.select("img"); log.info("Parse the number of image elements in the web page{}====={}",imgElements.size(),imgElements); // Get image URL src = imgElements.get(index).attr("src"); log.info("The address to get the index picture is ====={}",src); // Close ChromeDriver ThreadUtil.execute(new Runnable() {<!-- --> @Override public void run() {<!-- --> try {<!-- --> Thread.sleep(60000); } catch (InterruptedException e) {<!-- --> e.printStackTrace(); } log.info("Close the browser!"); driver.quit(); } }); return src; } catch (IOException e) {<!-- --> log.error("Failed to obtain image!"); e.printStackTrace(); } return "Failed to obtain image!"; } }
Firefox
package com.gxq.model.controller; import cn.hutool.core.thread.ThreadUtil; import lombok.AllArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements; import org.openqa.selenium.WebDriver; import org.openqa.selenium.chrome.ChromeDriver; import org.openqa.selenium.chrome.ChromeDriverService; import org.openqa.selenium.chrome.ChromeOptions; import org.openqa.selenium.firefox.FirefoxDriver; import org.openqa.selenium.firefox.FirefoxOptions; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; import java.io.File; import java.io.IOException; /** * Get web image Controller * * @author Xiaoqiang * @date July 9, 2023 22:53:17 */ @RestController @Slf4j @AllArgsConstructor @RequestMapping("/test") public class Test{<!-- --> /** * Get pictures * * @param index Which picture? * @return */ @GetMapping("/get-image") public String getImage(Integer index) {<!-- --> String src = null; try {<!-- --> String url = String url = "www.baidu.com";; log.info("Request url====={}",url); //Set the path of Selenium WebDriver System.setProperty("webdriver.gecko.driver", "C:\firefox\geckodriver.exe"); log.info("Selenium WebDriver path===={}","C:\firefox\geckodriver.exe"); //Generate a headless browser FirefoxOptions options = new FirefoxOptions(); options.addArguments("--headless"); options.addArguments("--disable-gpu"); // Create FirefoxDriver instance WebDriver driver = new FirefoxDriver(options); //Open the target page driver.get(url); // Wait for the page to load try {<!-- --> log.info("Waiting for page loading to complete"); Thread.sleep(2000); } catch (InterruptedException e) {<!-- --> e.printStackTrace(); } // Use Jsoup to parse HTML Document doc = Jsoup.parse(driver.getPageSource()); // Get image elements Elements imgElements = doc.select("img"); log.info("Parse the number of image elements in the web page{}====={}",imgElements.size(),imgElements); // Get image URL src = imgElements.get(index).attr("src"); log.info("The address to get the index picture is ====={}",src); // Close FirefoxDriver ThreadUtil.execute(new Runnable() {<!-- --> @Override public void run() {<!-- --> try {<!-- --> Thread.sleep(60000); } catch (InterruptedException e) {<!-- --> e.printStackTrace(); } log.info("Close the browser!"); driver.close(); } }); return src; } catch (Exception e) {<!-- --> log.error("Failed to obtain image!"); e.printStackTrace(); } return "Failed to obtain image!"; } }
Driver download: https://liushilive.github.io/github_selenium_drivers/md/Firefox.html