Submitted by: דניאל
import requests from lxml import html import os import csv import xlsxwriter # Author: Pablo Rotem # Developer URL: https://pablo-guides.com def scrape_site(url_list, xpaths, output_file, output_type): output_data = [] for url in url_list: try: response = requests.get(url) tree = html.fromstring(response.content) extracted_data = {} extracted_data['field_0'] = tree.xpath('//*[@id="__nuxt"]/div/main/div[2]/div/div[2]/div/div/div[2]/header/div[1]/div[3]') or tree.cssselect('//*[@id="__nuxt"]/div/main/div[2]/div/div[2]/div/div/div[2]/header/div[1]/div[3]') extracted_data['field_1'] = tree.xpath('//*[@id="__nuxt"]/div/main/div[2]/div/div[2]/div/div/div[2]/header/div[2]/img') or tree.cssselect('//*[@id="__nuxt"]/div/main/div[2]/div/div[2]/div/div/div[2]/header/div[2]/img') extracted_data['field_2'] = tree.xpath('//*[@id="__nuxt"]/div/main/div[2]/div/div[2]/div/div/div[2]/div[2]/a[2]/div/div[2]') or tree.cssselect('//*[@id="__nuxt"]/div/main/div[2]/div/div[2]/div/div/div[2]/div[2]/a[2]/div/div[2]') extracted_data['field_3'] = tree.xpath('//*[@id="__nuxt"]/div/main/div[2]/div/div[2]/div/div/div[2]/div[2]/a[1]/div/div[2]') or tree.cssselect('//*[@id="__nuxt"]/div/main/div[2]/div/div[2]/div/div/div[2]/div[2]/a[1]/div/div[2]') extracted_data['field_4'] = tree.xpath('//*[@id="__nuxt"]/div/main/div[2]/div/div[2]/div/div/div[2]/div[2]/a[3]/div/div[2]') or tree.cssselect('//*[@id="__nuxt"]/div/main/div[2]/div/div[2]/div/div/div[2]/div[2]/a[3]/div/div[2]') extracted_data['field_5'] = tree.xpath('//*[@id="__nuxt"]/div/main/div[2]/div/div[2]/div/div/div[2]/header/div[1]/div[4]') or tree.cssselect('//*[@id="__nuxt"]/div/main/div[2]/div/div[2]/div/div/div[2]/header/div[1]/div[4]') output_data.append(extracted_data) except Exception as e: print(f'Error scraping {url}: {e}') if output_type == 'csv': with open(output_file, 'w', newline='') as f: writer = csv.DictWriter(f, fieldnames=[f'field_5' for index in range(len(xpaths))]) writer.writeheader() writer.writerows(output_data) elif output_type == 'xlsx': workbook = xlsxwriter.Workbook(output_file) worksheet = workbook.add_worksheet() for row_num, data in enumerate(output_data): worksheet.write_row(row_num, 0, data.values()) workbook.close() else: with open(output_file, 'w') as f: for row in output_data: f.write(', '.join(row.values()) + 'n') def main(): xpaths = array ( 0 => '//*[@id=\"__nuxt\"]/div/main/div[2]/div/div[2]/div/div/div[2]/header/div[1]/div[3]', 1 => '//*[@id=\"__nuxt\"]/div/main/div[2]/div/div[2]/div/div/div[2]/header/div[2]/img', 2 => '//*[@id=\"__nuxt\"]/div/main/div[2]/div/div[2]/div/div/div[2]/div[2]/a[2]/div/div[2]', 3 => '//*[@id=\"__nuxt\"]/div/main/div[2]/div/div[2]/div/div/div[2]/div[2]/a[1]/div/div[2]', 4 => '//*[@id=\"__nuxt\"]/div/main/div[2]/div/div[2]/div/div/div[2]/div[2]/a[3]/div/div[2]', 5 => '//*[@id=\"__nuxt\"]/div/main/div[2]/div/div[2]/div/div/div[2]/header/div[1]/div[4]', ) url_list = array ( 0 => 'https://www.truecaller.com/search/il/0505774177', ) output_file = '050.txt' output_type = 'csv' scrape_site(url_list, xpaths, output_file, output_type) if __name__ == '__main__': main()