import tweepy
import time
import pandas as pd
import matplotlib.pyplot as plt
import nltk
from nltk.tokenize import word_tokenize
nltk.download('punkt')
from wordcloud import WordCloud, STOPWORDS
import re

consumer_key = "I cannot show my key and secret"
consumer_secret = "but to make this snippet of code work"
access_token = "you can apply to be a twitter developer"
access_token_secret = "you will have your own key and secret"
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True)

[nltk_data] Downloading package punkt to /Users/charliez/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


# The name of their Twitter accounts
trump = "realDonaldTrump"
biden = "JoeBiden"

# To get 50 tweets
count = 50
 
# Creation of query method using parameters
biden_tweets = tweepy.Cursor(api.user_timeline,id=biden).items(count)
trump_tweets = tweepy.Cursor(api.user_timeline,id=trump).items(count)

# Pulling information from tweets iterable object
biden_tweets_list = [[tweet.created_at, tweet.text, tweet.retweet_count, tweet.favorite_count] 
                     for tweet in biden_tweets]
trump_tweets_list = [[tweet.created_at, tweet.text, tweet.retweet_count, tweet.favorite_count] 
                     for tweet in trump_tweets]

# Creation of dataframe from tweets list
tweets_biden = pd.DataFrame(biden_tweets_list, columns = ["time stamp", "tweet", "retweets","favorites"])
tweets_trump = pd.DataFrame(trump_tweets_list, columns = ["time stamp", "tweet", "retweets","favorites"])


tweets_biden.head(10)


print(f'Biden: the first tweet was sent on {tweets_biden.loc[0]["time stamp"]}. \nthe 50th tweet was sent on {tweets_biden.loc[49]["time stamp"]}.\nIt took {tweets_biden.loc[0]["time stamp"]-tweets_biden.loc[49]["time stamp"]} for Biden to send 50 tweets.')

Biden: the first tweet was sent on 2020-11-08 14:28:23. 
the 50th tweet was sent on 2020-11-04 19:10:00.
It took 3 days 19:18:23 for Biden to send 50 tweets.


print(f'Trump: the first tweet was sent on {tweets_trump.loc[0]["time stamp"]}. \nthe 50th tweet was sent on {tweets_trump.loc[49]["time stamp"]}.\nIt took {tweets_trump.loc[0]["time stamp"]-tweets_trump.loc[49]["time stamp"]} for Trump to send 50 tweets.')

Trump: the first tweet was sent on 2020-11-09 04:54:16. 
the 50th tweet was sent on 2020-11-06 16:37:01.
It took 2 days 12:17:15 for Trump to send 50 tweets.


print(f'According to the recent data, Trump will send a tweet every {int(((tweets_trump.loc[0]["time stamp"]-tweets_trump.loc[49]["time stamp"]).total_seconds()/50)%60)} minutes')

According to the recent data, Trump will send a tweet every 20 minutes


print(f'According to the recent data, Biden will send a tweet every {int(((tweets_biden.loc[0]["time stamp"]-tweets_biden.loc[49]["time stamp"]).total_seconds()/50)%60)} minutes')

According to the recent data, Biden will send a tweet every 34 minutes


plt.plot(tweets_trump.index, tweets_trump.favorites, c='r', marker='o', label='Trump')
plt.plot(tweets_biden.index, tweets_biden.favorites, c='b', marker='s', label='Biden')
plt.legend(loc='upper right')
plt.title('Number of Favorites of Each Tweet')
plt.xlabel('tweets')
plt.ylabel('millions')
plt.show()


    print(f'Biden has {sum(tweets_biden.favorites)} favorites in total and {int(sum(tweets_biden.favorites)/50)} favorites per tweet. \nTrump has {sum(tweets_trump.favorites)} favorites in total and {int(sum(tweets_trump.favorites)/50)} favorites per tweet.')

Biden has 23764008 favorites in total and 475280 favorites per tweet. 
Trump has 10064734 favorites in total and 201294 favorites per tweet.


plt.plot(tweets_trump.index, tweets_trump.retweets, c='r', marker='o', label='Trump')
plt.plot(tweets_biden.index, tweets_biden.retweets, c='b', marker='s', label='Biden')
plt.legend(loc='upper right')
plt.title('Number of Retweets of Each Tweet')
plt.xlabel('tweets')
plt.show()


print(f'Biden has {sum(tweets_biden.retweets)} retweets in total and {int(sum(tweets_biden.retweets)/50)} retweets per tweet. \nTrump has {sum(tweets_trump.retweets)} retweets in total and {int(sum(tweets_trump.retweets)/50)} retweets per tweet.')

Biden has 2874090 retweets in total and 57481 retweets per tweet. 
Trump has 2275648 retweets in total and 45512 retweets per tweet.


def words(df):
    corpus = '' 
    stopwords = set(STOPWORDS) 

    # iterate through the tweets
    for text in df.tweet: 

        # remove url links in tweets
        text = result = re.sub(r"http\S+", "", str(text))
        # split the value 
        tokens = [word.lower() for word in word_tokenize(text)]
        # adding the tokenized words into corpus
        corpus += " ".join(tokens)+" "

    wordcloud = WordCloud(width = 800, height = 800, 
                    background_color = "white", 
                    stopwords = stopwords, 
                    min_font_size = 10).generate(corpus) 

    # plot the WordCloud image                        
    plt.figure(figsize = (8, 8), facecolor = None) 
    plt.imshow(wordcloud) 
    plt.axis("off") 
    plt.tight_layout(pad = 0) 

    plt.show()


words(tweets_trump)


words(tweets_biden)

	time stamp	tweet	retweets	favorites
0	2020-11-08 14:28:23	RT @Transition46: We stand together as one Ame...	15841	0
1	2020-11-08 02:20:00	From the bottom of my heart: thank you. https:...	73176	698724
2	2020-11-08 02:10:00	A nation united.\n\nA nation strengthened.\n\n...	66984	881406
3	2020-11-08 02:08:00	With full hearts and steady hands, with faith ...	40663	607572
4	2020-11-08 02:05:00	Tonight, the whole world is watching America. ...	23096	335244
5	2020-11-08 02:02:00	Especially for those moments when this campaig...	35591	469339
6	2020-11-08 01:59:00	I believe that this is part of the mandate fro...	8035	140258
7	2020-11-08 01:56:00	Now that the campaign is over—what is the peop...	16375	240432
8	2020-11-08 01:53:00	We cannot repair the economy, restore our vita...	20030	284120
9	2020-11-08 01:51:00	The Bible tells us that to everything there is...	36932	353445

-- by Charlie Chengrui Zheng 11/09/2020¶

Twitter Scraping¶

Analysis on Biden and Trump's tweets¶

Frequency¶

Reweets and Favorites¶

Word Cloud¶

Conclusion¶

Social Media Analytics on Trump and Biden's Twitter¶

-- by Charlie Chengrui Zheng 11/09/2020¶

Twitter Scraping¶

Analysis on Biden and Trump's tweets¶

Frequency¶

Reweets and Favorites¶

Word Cloud¶

Conclusion¶