In [None]:
###################################################
#___  ___          _            _       _         #
#|  \/  |         | |          | |     | |        #
#| .  . | __ _ ___| |_ ___   __| | __ _| |_ __ _  #
#| |\/| |/ _` / __| __/ _ \ / _` |/ _` | __/ _` | #
#| |  | | (_| \__ \ || (_) | (_| | (_| | || (_| | #
#\_|  |_/\__,_|___/\__\___/ \__,_|\__,_|\__\__,_| #
################################################### 
# written by Alexander Martin and Marcus Burkhardt #
# Mastodata is divided into da retrieval scripts and
# data loading scripts.
# This script analyzes data hosted by 'fedidb.org', while
# also retrieving them before the analysis.  
# Fedidb.org itself already has an analytical section.
# Due to that analysis here is very basic.
# For further analysis please fedidb.org itself.


#### Retrieve all fediverse instances listed in fedibd an filter for Mastodon domains ######

import requests
import json
import pandas as pd

keep = 'Mastodon'
url = 'https://api.fedidb.org/v1/servers/'

limit = 40
params = {
    'limit': limit
}


def query_fedidb(url, params=dict()):
    fediverse = []
    more = True
    while more:


            if 'limit=' not in url:
                response = requests.get(url, params=params)
            else:
                response = requests.get(url)


            if response.status_code == 200:
                data_raw = response.json()
            else:
                data_raw = dict()

            ####Overwrite initial link with new request link###
            if 'links' in data_raw and 'next' in data_raw['links']:
                url = (data_raw['links']['next'])
            else:
                more = False

            if url is None:
                print('NONE')
                more = False

            fediverse.extend(data_raw['data'])
    return fediverse

fediverse = query_fedidb(url, params=params)
fediverse_df = pd.json_normalize(fediverse)
### Rename categories to counter problems due to naming 
fediverse_df.rename(columns = {'location.city':'location_city', 'location.country':'location_country',
       'software.id':'software_id', 'software.name':'software_name', 'software.url':'software_url', 'software.version':'software_version',
       'stats.status_count':'status_count', 'stats.user_count':'user_count', 'stats.monthly_active_users':'monthly_active_users'}, inplace=True)
mastodon_df = fediverse_df[fediverse_df['software_name'] == keep]


In [None]:
# Show categories
fediverse_df.columns

In [None]:
mastodon_df.rename(columns = {'location.city':'location_city', 'location.country':'location_country',
       'software.id':'software_id', 'software.name':'software_name', 'software.url':'software_url', 'software.version':'software_version',
       'stats.status_count':'stats_status_count', 'stats.user_count':'stats_user_count', 'stats.monthly_active_users':'stats_monthly_active_users'}, inplace=True)

## Analysis

Quantitative information on users, monthly active users and statuses: 
    1.1 Average users
    1.2 Average (monthly) active users
    1.3.Average toots
    1.4 Averag toots per user
    1.5. Average toots per monthly user
    
For additional graphs, etc. Please visit 'fedidb.org'.


In [None]:
#print the stats of the entire Mastodon Dataframe
print('Fediverse:', len (fediverse_df))
print('Mastodon:', len(mastodon_df))
print('Mastodon stats from FediDB')
print('Average users on Mastodon per Instance:',mastodon_df['user_count'].sum()/len(mastodon_df))
print('Average of (monthly) active users on Mastodon per Instance:',mastodon_df['monthly_active_users'].sum()/len(mastodon_df))
print('Average toots per Instance:',mastodon_df['status_count'].sum() / len(mastodon_df))
print('Average toots per User:',mastodon_df['status_count'].sum() / mastodon_df['user_count'].sum())
print('Average toots per monthly active user (MAU):',mastodon_df['status_count'].sum() / mastodon_df['monthly_active_users'].sum())
print('Instances with open registration:', mastodon_df['open_registration'].sum())

In [None]:
# If you want to export one of the data frames as a csv, please uncomment the according command below
# Exports are saved in the same folder as where this script is executed

# Uncomment below, to receive a csv-file from 'fediverse_df' containing information on EVERY registered service on fedidb.org
# Remember to enter the correct path before the filename.
#fediverse_df.to_csv('fediverse_df.csv')

# Uncomment below, to receive a csv-file from 'mastodon_df', containing information ONLY on mastodon instances 
#mastodon_df.to_csv('mastodon_df.csv')

In [None]:
# List Mastodon instances from highest post count to lowest, with a selection of categories.
mastodon_df[['id', 'domain', 'status_count', 'user_count', 'monthly_active_users', 'location_country', 'open_registration']].sort_values(by = 'status_count', ascending=[False]).head(50)