{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "506cc9c0-cb5d-4db9-b37d-e487947f9e1b", "metadata": { "tags": [] }, "outputs": [], "source": [ "###################################################\n", "#___ ___ _ _ _ #\n", "#| \\/ | | | | | | | #\n", "#| . . | __ _ ___| |_ ___ __| | __ _| |_ __ _ #\n", "#| |\\/| |/ _` / __| __/ _ \\ / _` |/ _` | __/ _` | #\n", "#| | | | (_| \\__ \\ || (_) | (_| | (_| | || (_| | #\n", "#\\_| |_/\\__,_|___/\\__\\___/ \\__,_|\\__,_|\\__\\__,_| #\n", "################################################### \n", "# written by Alexander Martin and Marcus Burkhardt #\n", "# Mastodata is divided into da retrieval scripts and\n", "# data loading scripts.\n", "# This script analyzes data hosted by 'fedidb.org', while\n", "# also retrieving them before the analysis. \n", "# Fedidb.org itself already has an analytical section.\n", "# Due to that analysis here is very basic.\n", "# For further analysis please fedidb.org itself.\n", "\n", "\n", "#### Retrieve all fediverse instances listed in fedibd an filter for Mastodon domains ######\n", "\n", "import requests\n", "import json\n", "import pandas as pd\n", "\n", "keep = 'Mastodon'\n", "url = 'https://api.fedidb.org/v1/servers/'\n", "\n", "limit = 40\n", "params = {\n", " 'limit': limit\n", "}\n", "\n", "\n", "def query_fedidb(url, params=dict()):\n", " fediverse = []\n", " more = True\n", " while more:\n", "\n", "\n", " if 'limit=' not in url:\n", " response = requests.get(url, params=params)\n", " else:\n", " response = requests.get(url)\n", "\n", "\n", " if response.status_code == 200:\n", " data_raw = response.json()\n", " else:\n", " data_raw = dict()\n", "\n", " ####Overwrite initial link with new request link###\n", " if 'links' in data_raw and 'next' in data_raw['links']:\n", " url = (data_raw['links']['next'])\n", " else:\n", " more = False\n", "\n", " if url is None:\n", " print('NONE')\n", " more = False\n", "\n", " fediverse.extend(data_raw['data'])\n", " return fediverse\n", "\n", "fediverse = query_fedidb(url, params=params)\n", "fediverse_df = pd.json_normalize(fediverse)\n", "### Rename categories to counter problems due to naming \n", "fediverse_df.rename(columns = {'location.city':'location_city', 'location.country':'location_country',\n", " 'software.id':'software_id', 'software.name':'software_name', 'software.url':'software_url', 'software.version':'software_version',\n", " 'stats.status_count':'status_count', 'stats.user_count':'user_count', 'stats.monthly_active_users':'monthly_active_users'}, inplace=True)\n", "mastodon_df = fediverse_df[fediverse_df['software_name'] == keep]\n" ] }, { "cell_type": "code", "execution_count": null, "id": "263bd2cd-2587-4a40-baff-f5bf1054fd03", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Show categories\n", "fediverse_df.columns" ] }, { "cell_type": "code", "execution_count": null, "id": "7486c203-1ef9-452f-b629-abd0889321fe", "metadata": { "tags": [] }, "outputs": [], "source": [ "mastodon_df.rename(columns = {'location.city':'location_city', 'location.country':'location_country',\n", " 'software.id':'software_id', 'software.name':'software_name', 'software.url':'software_url', 'software.version':'software_version',\n", " 'stats.status_count':'stats_status_count', 'stats.user_count':'stats_user_count', 'stats.monthly_active_users':'stats_monthly_active_users'}, inplace=True)" ] }, { "cell_type": "markdown", "id": "6bb4122a-5540-40ea-82ba-d5da33e1ac7f", "metadata": {}, "source": [ "## Analysis\n", "\n", "Quantitative information on users, monthly active users and statuses: \n", " 1.1 Average users\n", " 1.2 Average (monthly) active users\n", " 1.3.Average toots\n", " 1.4 Averag toots per user\n", " 1.5. Average toots per monthly user\n", " \n", "For additional graphs, etc. Please visit 'fedidb.org'.\n" ] }, { "cell_type": "code", "execution_count": null, "id": "69014130-9237-4fb2-96ab-5f1a83371b95", "metadata": { "tags": [] }, "outputs": [], "source": [ "#print the stats of the entire Mastodon Dataframe\n", "print('Fediverse:', len (fediverse_df))\n", "print('Mastodon:', len(mastodon_df))\n", "print('Mastodon stats from FediDB')\n", "print('Average users on Mastodon per Instance:',mastodon_df['user_count'].sum()/len(mastodon_df))\n", "print('Average of (monthly) active users on Mastodon per Instance:',mastodon_df['monthly_active_users'].sum()/len(mastodon_df))\n", "print('Average toots per Instance:',mastodon_df['status_count'].sum() / len(mastodon_df))\n", "print('Average toots per User:',mastodon_df['status_count'].sum() / mastodon_df['user_count'].sum())\n", "print('Average toots per monthly active user (MAU):',mastodon_df['status_count'].sum() / mastodon_df['monthly_active_users'].sum())\n", "print('Instances with open registration:', mastodon_df['open_registration'].sum())" ] }, { "cell_type": "code", "execution_count": null, "id": "03a1c204-16c0-4e79-a5b6-fe4ce82b67b2", "metadata": { "tags": [] }, "outputs": [], "source": [ "# If you want to export one of the data frames as a csv, please uncomment the according command below\n", "# Exports are saved in the same folder as where this script is executed\n", "\n", "# Uncomment below, to receive a csv-file from 'fediverse_df' containing information on EVERY registered service on fedidb.org\n", "# Remember to enter the correct path before the filename.\n", "#fediverse_df.to_csv('fediverse_df.csv')\n", "\n", "# Uncomment below, to receive a csv-file from 'mastodon_df', containing information ONLY on mastodon instances \n", "#mastodon_df.to_csv('mastodon_df.csv')" ] }, { "cell_type": "code", "execution_count": null, "id": "b6796ed5-e456-4ee7-a6dd-b19953f7c865", "metadata": { "tags": [] }, "outputs": [], "source": [ "# List Mastodon instances from highest post count to lowest, with a selection of categories.\n", "mastodon_df[['id', 'domain', 'status_count', 'user_count', 'monthly_active_users', 'location_country', 'open_registration']].sort_values(by = 'status_count', ascending=[False]).head(50)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }