Submitted by: Hackston
import requests from lxml import html import os import csv import xlsxwriter # Author: Pablo Rotem # Developer URL: https://pablo-guides.com def scrape_site(url_list, xpaths, output_file, output_type): output_data = [] for url in url_list: try: response = requests.get(url) tree = html.fromstring(response.content) extracted_data = {} extracted_data['field_0'] = tree.xpath('//*[@id="search"]/div[1]/div[1]/div/span[1]/div[1]/div[2]/div/div/span/div/div/div[2]/span/a/div/img') or tree.cssselect('//*[@id="search"]/div[1]/div[1]/div/span[1]/div[1]/div[2]/div/div/span/div/div/div[2]/span/a/div/img') extracted_data['field_1'] = tree.xpath('//*[@id="__nuxt"]/div/main/div[2]/div/div[2]/div/div/div[2]/header/div[1]/div[3]') or tree.cssselect('//*[@id="__nuxt"]/div/main/div[2]/div/div[2]/div/div/div[2]/header/div[1]/div[3]') output_data.append(extracted_data) except Exception as e: print(f'Error scraping {url}: {e}') if output_type == 'csv': with open(output_file, 'w', newline='') as f: writer = csv.DictWriter(f, fieldnames=[f'field_1' for index in range(len(xpaths))]) writer.writeheader() writer.writerows(output_data) elif output_type == 'xlsx': workbook = xlsxwriter.Workbook(output_file) worksheet = workbook.add_worksheet() for row_num, data in enumerate(output_data): worksheet.write_row(row_num, 0, data.values()) workbook.close() else: with open(output_file, 'w') as f: for row in output_data: f.write(', '.join(row.values()) + 'n') def main(): xpaths = array ( 0 => '//*[@id=\"search\"]/div[1]/div[1]/div/span[1]/div[1]/div[2]/div/div/span/div/div/div[2]/span/a/div/img', 1 => '//*[@id=\"__nuxt\"]/div/main/div[2]/div/div[2]/div/div/div[2]/header/div[1]/div[3]', ) url_list = array ( 0 => 'https://google.com', ) output_file = 'urls.txt' output_type = 'csv' scrape_site(url_list, xpaths, output_file, output_type) if __name__ == '__main__': main()