FediBubble/FediBubble.py
2024-01-03 19:30:25 +01:00

121 lines
3.7 KiB
Python

import requests
import json
import pandas as pd
import sys
# user="marcelcosta"
# user="xaloc"
# domain="bcn.fedi.cat"
# domain="social.anartist.org"
# domain="fedi.xaloc.space"
# domain="video.anartist.org"
user=sys.argv[1]
domain=sys.argv[2]
instance_link = requests.get("https://"+domain+"/.well-known/nodeinfo").json()['links'][0]['href']
instance_software = requests.get(instance_link).json()['software']['name']
response = requests.get("https://"+domain+"/api/v1/accounts/lookup?acct="+user)
if response.status_code == 200:
id=response.json()['id']
else:
response = requests.get("https://"+domain+"/api/v1/accounts/"+user)
id=response.json()['id']
url="https://"+domain+"/api/v1/accounts/"+str(id)+"/following"
status=200
lenlinks = ["next"]
accts = []
nodes = []
soft = []
count_names=['following_count', 'followingCount']
counts= [[e for e in [response.json().get(num,0) for num in count_names] if e > 0]][0][0]
i=1
nodes_d = {}
def progressBar(count_value, total, suffix=''):
bar_length = 100
filled_up_Length = int(round(bar_length* count_value / float(total)))
percentage = round(100.0 * count_value/float(total),1)
bar = '=' * filled_up_Length + '-' * (bar_length - filled_up_Length)
sys.stdout.write('[%s] %s%s ...%s\r' %(bar, percentage, '%', suffix))
sys.stdout.flush()
while status == 200 and 'next' in lenlinks:
response = requests.get(url)
status=response.status_code
if instance_software in ["akkoma"]:
for d in response.json():
accts.append(d['acct'])
try:
nodes.append(d['akkoma']['instance']['name'])
except:
nodes.append("Unknown")
try:
soft.append(d['akkoma']['instance']['nodeinfo']['software']['name'])
except:
soft.append("Unknown")
i=i+1
progressBar(i,counts)
else:
for d in response.json():
# print(d['acct'])
accts.append(d['acct'])
if len(d['acct'].split("@")) > 1:
nodes.append(d['acct'].split("@")[1])
else:
nodes.append("social.anartist.org")
if nodes[-1] in nodes_d.keys():
soft.append(nodes_d[nodes[-1]])
else:
try:
d_link = requests.get("https://"+nodes[-1]+"/.well-known/nodeinfo", timeout=10).json()['links'][0]['href']
soft.append(requests.get(d_link).json()['software']['name'])
except:
soft.append("Unknown")
nodes_d[nodes[-1]]=soft[-1]
i=i+1
progressBar(i,counts)
# print(len(accts))
if 'next' in response.links:
url=response.links['next']['url']
lenlinks = response.links
df = pd.DataFrame({'Account': accts,'Node': nodes,'Software': soft})
df_node = pd.DataFrame({'Node':df.groupby('Node').size().index, 'Size':df.groupby('Node').size().values}).sort_values('Size',ascending=False)
df_soft = pd.DataFrame({'Software':df.groupby('Software').size().index, 'Size':df.groupby('Software').size().values}).sort_values('Size',ascending=False)
df.to_csv("outs/"+user+"_"+domain+"_df.csv",index=False)
df_node.to_csv("outs/"+user+"_"+domain+"_df_node.csv",index=False)
df_soft.to_csv("outs/"+user+"_"+domain+"_df_soft.csv",index=False)
import plotly.express as px
import os
if not os.path.exists("outs"):
os.makedirs("outs")
if len(df_node) > 10:
df_node.loc[df_node['Node'].isin(df_node[10:]['Node']),'Node']="Other"
fig = px.pie(df_node, values='Size', names='Node', title='Node Distribution')
# fig.show()
fig.write_image("outs/"+user+"_"+domain+"_Nodes.png")
if len(df_soft) > 10:
df_soft.loc[df_soft['Software'].isin(df_soft[10:]['Software']),'Software']="Other"
fig = px.pie(df_soft, values='Size', names='Software', title='Software Distribution')
# fig.show()
fig.write_image("outs/"+user+"_"+domain+"_Soft.png")