Step 0: Introduction

In this article, we will work with Twitter, which is quite popular and has a lot of users. By using the Twitter API and writing our code in Python language, we will collect data from Twitter and try to process this data according to my story. Let’s tell the story and go to the implementation section 🙌🏽

Step 1: Collect data from the Twitter API

We will use Twitter (Application Programming Interface) to get data from Twitter. First of all, we need to go to a new app creation page and get our own key and secret values. When we connect to Twitter with the API we will use the values ​​we have.

import requests
from requests_oauthlib import OAuth1
import oauth2
CONSUMER_KEY = “XXX”
CONSUMER_SECRET = “XXX”
ACCESS_TOKEN = “XXX”
ACCESS_TOKEN_SECRET = “XXX”
auth = OAuth1(CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
consumer = oauth2.Consumer(key=CONSUMER_KEY,secret=CONSUMER_SECRET)
access_token = oauth2.Token(key=ACCESS_TOKEN,secret=ACCESS_TOKEN_SECRET)
import pymongo
from pymongo import MongoClient
client = oauth2.Client(consumer,access_token)# create a connection with Mongoclient
mongo_client = MongoClient(‘localhost’, 27017)
db = mongo_client[‘new_db’] # create a database
new_collection = db.new_collection # create a collection
import json
import time
QUERY = ‘openmaker’ENDPOINTS = {
‘followers’: ‘https://api.twitter.com/1.1/followers/ids.json',
}
next_cursor = -1def get_followers(username, cursor=-1, nested_count=0):
if nested_count > 14: # rate limit: max 15 requests in 15 mins
return []
params = {
‘screen_name’: username,
‘cursor’: cursor
}
response = requests.get(ENDPOINTS[‘followers’], auth=auth, params=params)

data = response.json()
followers_collection.insert_one(data)next_cursor = data[‘next_cursor’]

return data[‘ids’] + get_followers(username, data[‘next_cursor’], nested_count+1)
then = time.time()if __name__ == ‘__main__’:
diff = 0
while diff < (16*60):
diff = time.time() — then # in seconds
sleep_time = 16*60 — diff
if sleep_time > 0:
time.sleep(sleep_time)
ids = followers_collection.aggregate([{‘$unwind’: ‘$ids’}])
b = 0
for id in ids:
b += 1
del id[‘_id’]
followers_ids_collection.insert_one(id)
def get_profile(a):
for i in range(a, a + 299): # rate limit: max 300 request
# to get max 100 user_ids (user_ids should be form of a comma
seperated list)
userid = followers_ids_collection.find().skip(i*100).limit(100)

str_user = ‘,’.join([str(int_id[‘ids’]) for int_id in userid])

request = ‘{url}{user_id}’.format(url=”https://api.twitter.com/1.1/users/lookup.json?user_id=", user_id=str_user)
response, data = client.request(request)
profile = json.loads(data)
for j in profile:
followers_profile_collection.insert_one(j)

if __name__ == ‘__main__’:
get_profile(0*300) # 0*300 gets 30 000 data, then you should
continue with 1*300, 2*300, … respectively.
def get_friends():
for i in followers_profile_collection.find()[147:228]:
next_cursor = -1
friends = []
userid = i[‘id’]

while True:
url = “https://api.twitter.com/1.1/friends/ids.json?cursor=" + str(next_cursor) + “&user_id=” + str(userid)
try:
response, data = client.request(url)
except TimeoutError:
print “TimeoutError, waiting 5 seconds to retry…”
time.sleep(5)
except Exception as e:
print “Some other exception happened. “, e
print “Waiting 30 seconds to retry…”
time.sleep(30)

if response.status == 200:
parsed_data = json.loads(data)
friends = friends + list(parsed_data[‘ids’])
next_cursor = parsed_data.get(‘next_cursor’)
if next_cursor == 0:
break
elif response.status == 429:
print float(response[‘x-rate-limit-reset’]) — time.time()
time.sleep(max(float(response[‘x-rate-limit-reset’]) — time.time(), 0))
elif response.status >= 400 and response.status < 500:
print ‘User %s is skipped because of status %d’ % (str(userid), response.status)
break
else:
print ‘Got status: %d trying again…’ % response.status
time.sleep(3)
continue
friend_string = ‘,’.join([str(friend) for friend in friends])

d = {“user_id” : userid, “friend_ids” : friend_string}

friend_of_users_collection.insert_one(d)

if __name__ == ‘__main__’:
get_friends()

Step 2: Processing data to analyze relationships between Twitter accounts

To process our data we first extract it from MongoDB.

import pandas as pd
total_data = pd.DataFrame(list(friend_of_users_collection.find()))
user-friend list
number_of_user_ids = len(total_data_user_ids)temp_list = [] 
data_list = {}
for user_id in range(0, number_of_user_ids):
if str(total_data_friend_ids[user_id]) != ‘’:
total_subset_friend_ids =
map(int,str(total_data_friend_ids[user_id]).split(‘,’))
temp_list = set(total_subset_friend_ids).intersection(list(total_data_user_ids))
if list(temp_list) != []:
data_list[total_data_user_ids[user_id]] = list(temp_list)
source-target data
source-target relation

Ph.D. Cand. in CmpE @Boğaziçi University. #ai #privacy #uncertainty #ml #dl #running #cycling #she/her https://www.cmpe.boun.edu.tr/~gonul.ayci/

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store