#! /usr/bin/env python3 import urllib import urllib.request import bs4 import re import sys import time months = ['January','February','March','April', 'May','June','July','August','September', 'October','November','December'] years = ['2012', '2013', '2014','2015', '2016', '2017', '2018'] urlbody = "https://afnog.org/pipermail/afnog/" generated = 'This archive was generated by' nameset = set() count_instances = dict() mails_total = 0 # Create timestamp ts = time.strftime("%Y%m%d-%H%M-%S") # Find names function def find_names(*args): counter = 0 url = args[0] beautiful = urllib.request.urlopen(url).read() soup = bs4.BeautifulSoup(beautiful, 'html.parser') tags = soup('i') for tag in tags: for match in re.findall(r'(.*)', str(tag)): if '' in match or generated in match: pass else: counter += 1 nameset.add(match) return(counter) # End function # Get the year year = input('Enter the year [2012 - 2018| all]: ') year = year.lower() # Define output files names_file_txt = '{}-AfNOG-names-{}.txt'.format(ts, year) names_file_csv = '{}-AfNOG-names-{}.csv'.format(ts, year) if year == 'all' or year in years: pass else: print('Not a valid year.') sys.exit(1) if year.lower() != 'all': years = [year] # Process years for y in years: print() sys.stdout.write('Processing {} - '.format(y)) sys.stdout.flush() # Process months for month in months: try: url = urlbody + y + '-' + month + '/author.html' count = find_names(url) count_instances[month] = count except: print('\nNo {} detail in {} yet'.format(month, y)) break sys.stdout.write('{} '.format(month[0:3])) sys.stdout.flush() # Print summary details to console print('\nSummary') print('-' * len('Summary')) print() for y in years: for k, v in count_instances.items(): # Uncomment for mail per month details # print(v, 'mails in', k, y) mails_total += v print ('{} unique users wrote {} mails' .format(len(nameset),mails_total)) print() # Open file for username output fh_txt = open(names_file_txt, mode='w', encoding='utf-8') fh_csv = open(names_file_csv, mode='w', encoding='utf-8') # Print usernames to file fh_txt.write('Users names for ') fh_txt.write(year) fh_txt.write('\n') dash_length = len('Users names for ') + len(year) fh_txt.write('-' * dash_length) fh_txt.write('\n') pad = len(max(nameset, key=len)) + 3 for c, name in enumerate(nameset): if (c % 2 == 0): fh_txt.write('\n') txt = '{}{}'.format(name, ' ' * (pad - len(name))) fh_txt.write(txt) csv = '{},'.format(name) fh_csv.write(csv) fh_txt.close() fh_csv.close() # End sys.exit(0)