Submitted by: גל בדיקה
import requests from lxml import html import os import csv import xlsxwriter # Author: Pablo Rotem # Developer URL: https://pablo-guides.com def scrape_site(url_list, xpaths, output_file, output_type): output_data = [] for url in url_list: try: response = requests.get(url) tree = html.fromstring(response.content) extracted_data = {} extracted_data['field_0'] = tree.xpath('//*[@id=":rc3:"]/span[1]/span') or tree.cssselect('//*[@id=":rc3:"]/span[1]/span') output_data.append(extracted_data) except Exception as e: print(f'Error scraping {url}: {e}') if output_type == 'csv': with open(output_file, 'w', newline='') as f: writer = csv.DictWriter(f, fieldnames=[f'field_0' for index in range(len(xpaths))]) writer.writeheader() writer.writerows(output_data) elif output_type == 'xlsx': workbook = xlsxwriter.Workbook(output_file) worksheet = workbook.add_worksheet() for row_num, data in enumerate(output_data): worksheet.write_row(row_num, 0, data.values()) workbook.close() else: with open(output_file, 'w') as f: for row in output_data: f.write(', '.join(row.values()) + 'n') def main(): xpaths = array ( 0 => '//*[@id=\":rc3:\"]/span[1]/span', ) url_list = array ( 0 => 'https://www.facebook.com/groups/2653651874928924/members', ) output_file = '' output_type = 'csv' scrape_site(url_list, xpaths, output_file, output_type) if __name__ == '__main__': main()