Submitted by: דניאל

import requests
from lxml import html
import os
import csv
import xlsxwriter

# Author: Pablo Rotem
# Developer URL: https://pablo-guides.com

def scrape_site(url_list, xpaths, output_file, output_type):
    output_data = []
    for url in url_list:
        try:
            response = requests.get(url)
            tree = html.fromstring(response.content)
            extracted_data = {}
        
            extracted_data['field_0'] = tree.xpath('//*[@id="search"]/div[1]/div[1]/div/span[1]/div[1]/div[2]/div/div/span/div/div/div[2]/span/a/div/img') or tree.cssselect('//*[@id="search"]/div[1]/div[1]/div/span[1]/div[1]/div[2]/div/div/span/div/div/div[2]/span/a/div/img')
            
            extracted_data['field_1'] = tree.xpath('//*[@id="search"]/div[1]/div[1]/div/span[1]/div[1]/div[2]/div/div/span/div/div/div[3]/div[1]/h2/a/span') or tree.cssselect('//*[@id="search"]/div[1]/div[1]/div/span[1]/div[1]/div[2]/div/div/span/div/div/div[3]/div[1]/h2/a/span')
            
            extracted_data['field_2'] = tree.xpath('//*[@id="search"]/div[1]/div[1]/div/span[1]/div[1]/div[2]/div/div/span/div/div/div[3]/div[3]/div/div[1]/a/span/span[2]/span[2]') or tree.cssselect('//*[@id="search"]/div[1]/div[1]/div/span[1]/div[1]/div[2]/div/div/span/div/div/div[3]/div[3]/div/div[1]/a/span/span[2]/span[2]')
            
            output_data.append(extracted_data)
        except Exception as e:
            print(f'Error scraping {url}: {e}')

    if output_type == 'csv':
        with open(output_file, 'w', newline='') as f:
            writer = csv.DictWriter(f, fieldnames=[f'field_2' for index in range(len(xpaths))])
            writer.writeheader()
            writer.writerows(output_data)
    elif output_type == 'xlsx':
        workbook = xlsxwriter.Workbook(output_file)
        worksheet = workbook.add_worksheet()
        for row_num, data in enumerate(output_data):
            worksheet.write_row(row_num, 0, data.values())
        workbook.close()
    else:
        with open(output_file, 'w') as f:
            for row in output_data:
                f.write(', '.join(row.values()) + 'n')

def main():
    xpaths = array (
  0 => '//*[@id=\"search\"]/div[1]/div[1]/div/span[1]/div[1]/div[2]/div/div/span/div/div/div[2]/span/a/div/img',
  1 => '//*[@id=\"search\"]/div[1]/div[1]/div/span[1]/div[1]/div[2]/div/div/span/div/div/div[3]/div[1]/h2/a/span',
  2 => '//*[@id=\"search\"]/div[1]/div[1]/div/span[1]/div[1]/div[2]/div/div/span/div/div/div[3]/div[3]/div/div[1]/a/span/span[2]/span[2]',
)
    url_list = array (
  0 => 'https://www.amazon.com/s?i=specialty-aps&bbn=16225007011&rh=n%3A16225007011%2Cn%3A172456&language=he&ref=nav_em__nav_desktop_sa_intl_computer_accessories_and_peripherals_0_2_6_2',
)
    output_file = 'amazon1'
    output_type = 'csv'
    scrape_site(url_list, xpaths, output_file, output_type)

if __name__ == '__main__':
    main()
        
Pablo Guides