Add files via upload
Initial upload of Mastodata
This commit is contained in:
203
analysis_notebook_fedidb.ipynb
Normal file
203
analysis_notebook_fedidb.ipynb
Normal file
@@ -0,0 +1,203 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "506cc9c0-cb5d-4db9-b37d-e487947f9e1b",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"###################################################\n",
|
||||
"#___ ___ _ _ _ #\n",
|
||||
"#| \\/ | | | | | | | #\n",
|
||||
"#| . . | __ _ ___| |_ ___ __| | __ _| |_ __ _ #\n",
|
||||
"#| |\\/| |/ _` / __| __/ _ \\ / _` |/ _` | __/ _` | #\n",
|
||||
"#| | | | (_| \\__ \\ || (_) | (_| | (_| | || (_| | #\n",
|
||||
"#\\_| |_/\\__,_|___/\\__\\___/ \\__,_|\\__,_|\\__\\__,_| #\n",
|
||||
"################################################### \n",
|
||||
"# Mastodata is divided into da retrieval scripts and\n",
|
||||
"# data loading scripts.\n",
|
||||
"# This script analyzes data hosted by 'fedidb.org', while\n",
|
||||
"# also retrieving them before the analysis. \n",
|
||||
"# Fedidb.org itself already has an analytical section.\n",
|
||||
"# Due to that analysis here is very basic.\n",
|
||||
"# For further analysis please fedidb.org itself.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"#### Retrieve all fediverse instances listed in fedibd an filter for Mastodon domains ######\n",
|
||||
"\n",
|
||||
"import requests\n",
|
||||
"import json\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"keep = 'Mastodon'\n",
|
||||
"url = 'https://api.fedidb.org/v1/servers/'\n",
|
||||
"\n",
|
||||
"limit = 40\n",
|
||||
"params = {\n",
|
||||
" 'limit': limit\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def query_fedidb(url, params=dict()):\n",
|
||||
" fediverse = []\n",
|
||||
" more = True\n",
|
||||
" while more:\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" if 'limit=' not in url:\n",
|
||||
" response = requests.get(url, params=params)\n",
|
||||
" else:\n",
|
||||
" response = requests.get(url)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" if response.status_code == 200:\n",
|
||||
" data_raw = response.json()\n",
|
||||
" else:\n",
|
||||
" data_raw = dict()\n",
|
||||
"\n",
|
||||
" ####Overwrite initial link with new request link###\n",
|
||||
" if 'links' in data_raw and 'next' in data_raw['links']:\n",
|
||||
" url = (data_raw['links']['next'])\n",
|
||||
" else:\n",
|
||||
" more = False\n",
|
||||
"\n",
|
||||
" if url is None:\n",
|
||||
" print('NONE')\n",
|
||||
" more = False\n",
|
||||
"\n",
|
||||
" fediverse.extend(data_raw['data'])\n",
|
||||
" return fediverse\n",
|
||||
"\n",
|
||||
"fediverse = query_fedidb(url, params=params)\n",
|
||||
"fediverse_df = pd.json_normalize(fediverse)\n",
|
||||
"### Rename categories to counter problems due to naming \n",
|
||||
"fediverse_df.rename(columns = {'location.city':'location_city', 'location.country':'location_country',\n",
|
||||
" 'software.id':'software_id', 'software.name':'software_name', 'software.url':'software_url', 'software.version':'software_version',\n",
|
||||
" 'stats.status_count':'status_count', 'stats.user_count':'user_count', 'stats.monthly_active_users':'monthly_active_users'}, inplace=True)\n",
|
||||
"mastodon_df = fediverse_df[fediverse_df['software_name'] == keep]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "263bd2cd-2587-4a40-baff-f5bf1054fd03",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Show categories\n",
|
||||
"fediverse_df.columns"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7486c203-1ef9-452f-b629-abd0889321fe",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mastodon_df.rename(columns = {'location.city':'location_city', 'location.country':'location_country',\n",
|
||||
" 'software.id':'software_id', 'software.name':'software_name', 'software.url':'software_url', 'software.version':'software_version',\n",
|
||||
" 'stats.status_count':'stats_status_count', 'stats.user_count':'stats_user_count', 'stats.monthly_active_users':'stats_monthly_active_users'}, inplace=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6bb4122a-5540-40ea-82ba-d5da33e1ac7f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Analysis\n",
|
||||
"\n",
|
||||
"Quantitative information on users, monthly active users and statuses: \n",
|
||||
" 1.1 Average users\n",
|
||||
" 1.2 Average (monthly) active users\n",
|
||||
" 1.3.Average toots\n",
|
||||
" 1.4 Averag toots per user\n",
|
||||
" 1.5. Average toots per monthly user\n",
|
||||
" \n",
|
||||
"For additional graphs, etc. Please visit 'fedidb.org'.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "69014130-9237-4fb2-96ab-5f1a83371b95",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#print the stats of the entire Mastodon Dataframe\n",
|
||||
"print('Fediverse:', len (fediverse_df))\n",
|
||||
"print('Mastodon:', len(mastodon_df))\n",
|
||||
"print('Mastodon stats from FediDB')\n",
|
||||
"print('Average users on Mastodon per Instance:',mastodon_df['user_count'].sum()/len(mastodon_df))\n",
|
||||
"print('Average of (monthly) active users on Mastodon per Instance:',mastodon_df['monthly_active_users'].sum()/len(mastodon_df))\n",
|
||||
"print('Average toots per Instance:',mastodon_df['status_count'].sum() / len(mastodon_df))\n",
|
||||
"print('Average toots per User:',mastodon_df['status_count'].sum() / mastodon_df['user_count'].sum())\n",
|
||||
"print('Average toots per monthly active user (MAU):',mastodon_df['status_count'].sum() / mastodon_df['monthly_active_users'].sum())\n",
|
||||
"print('Instances with open registration:', mastodon_df['open_registration'].sum())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "03a1c204-16c0-4e79-a5b6-fe4ce82b67b2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# If you want to export one of the data frames as a csv, please uncomment the according command below\n",
|
||||
"# Exports are saved in the same folder as where this script is executed\n",
|
||||
"\n",
|
||||
"# Uncomment below, to receive a csv-file from 'fediverse_df' containing information on EVERY registered service on fedidb.org\n",
|
||||
"# Remember to enter the correct path before the filename.\n",
|
||||
"#fediverse_df.to_csv('fediverse_df.csv')\n",
|
||||
"\n",
|
||||
"# Uncomment below, to receive a csv-file from 'mastodon_df', containing information ONLY on mastodon instances \n",
|
||||
"#mastodon_df.to_csv('mastodon_df.csv')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b6796ed5-e456-4ee7-a6dd-b19953f7c865",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# List Mastodon instances from highest post count to lowest, with a selection of categories.\n",
|
||||
"mastodon_df[['id', 'domain', 'status_count', 'user_count', 'monthly_active_users', 'location_country', 'open_registration']].sort_values(by = 'status_count', ascending=[False]).head(50)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user