Submitted by: דניאל
import requests from lxml import html import os import csv import xlsxwriter # Author: Pablo Rotem # Developer URL: https://pablo-guides.com def scrape_site(url_list, xpaths, output_file, output_type): output_data = [] for url in url_list: try: response = requests.get(url) tree = html.fromstring(response.content) extracted_data = {} extracted_data['field_0'] = tree.xpath('//*[@id="mount_0_0_4Y"]/div/div[1]/div[1]/div/div[3]/div/div/div[2]/div[1]/div[4]/div/div/div/div/div/div/div/div/div/div/div[2]/div[1]/div/div/div[1]/span/span/a/div[1]/svg/g/image') or tree.cssselect('//*[@id="mount_0_0_4Y"]/div/div[1]/div[1]/div/div[3]/div/div/div[2]/div[1]/div[4]/div/div/div/div/div/div/div/div/div/div/div[2]/div[1]/div/div/div[1]/span/span/a/div[1]/svg/g/image') output_data.append(extracted_data) except Exception as e: print(f'Error scraping {url}: {e}') if output_type == 'csv': with open(output_file, 'w', newline='') as f: writer = csv.DictWriter(f, fieldnames=[f'field_0' for index in range(len(xpaths))]) writer.writeheader() writer.writerows(output_data) elif output_type == 'xlsx': workbook = xlsxwriter.Workbook(output_file) worksheet = workbook.add_worksheet() for row_num, data in enumerate(output_data): worksheet.write_row(row_num, 0, data.values()) workbook.close() else: with open(output_file, 'w') as f: for row in output_data: f.write(', '.join(row.values()) + 'n') def main(): xpaths = array ( 0 => '//*[@id=\"mount_0_0_4Y\"]/div/div[1]/div[1]/div/div[3]/div/div/div[2]/div[1]/div[4]/div/div/div/div/div/div/div/div/div/div/div[2]/div[1]/div/div/div[1]/span/span/a/div[1]/svg/g/image', ) url_list = array ( 0 => 'https://pablo-guides.com/', ) output_file = '' output_type = 'csv' scrape_site(url_list, xpaths, output_file, output_type) if __name__ == '__main__': main()