twitter scraping with snscrape

Beta

!pip install snscrape




!pip install hvplot panel

!pip install wordcloud

import pandas as pd
import snscrape.modules.twitter as sntwitter 
import datetime as dt 
from wordcloud import WordCloud

import matplotlib.pyplot as plt

import seaborn as sns

import panel as pn
import hvplot.pandas 
import hvplot as hv 
pn.extension('tabular',sizing_mode='sketch_width')
hv.extension('bokeh')

# defining color palette
PAL= ['r','y'.'g']
pn.Row(pn.layout.HSpacer(height=50,background=PAL[0]))

list1 = []

for tweet in sntwitter.TwitterHashtagScraper('DataFestAfrica22').get_items():
    list1.append([tweet.date, tweet.id,tweet.retweetedTweet, tweet.content, tweet.user.username, tweet.replyCount, tweet.likeCount,tweet.retweetCount,tweet.source])
column_names= ['tweet_datetime', 'tweet_id','retweeted_tweet_id', 'tweet_content', 'username', 'numb_reply',
       'numb_likes','numb_retweet','tweet_source']
tweetdf = pd.DataFrame(list1, columns=column_names)

tweetdf['tweet_date']=tweetdf.tweet_datetime.dt.date
print(tweetdf.shape)
print(tweetdf.info())


tweetdf.head()

tweetdf.to_csv('datafest22_tweeter_data')


tweetdf.tweet_date.value_counts().plot();
plt.xticks(rotation=90);

tweetdf.username.value_counts()[0:10].plot(kind='bar')

tweetdf.groupby('username')[['numb_likes','numb_reply']].sum().sort_values(by=['numb_likes','numb_reply'],ascending=False)[0:10].plot(kind='bar')

top10_tweeters =tweetdf.username.value_counts()[0:10].index
top10_df= tweetdf[tweetdf.username.isin(top10_tweeters)]

data=top10_df.groupby(['tweet_date','username']).count()['tweet_id'].reset_index()
order=tweetdf.username.value_counts()[0:10].index
g=sns.relplot(data=data,x='tweet_date',y='tweet_id',col='username',col_wrap=2,kind='line',col_order=order)
g.set(ylabel='number of tweets ',xlabel='date');
plt.xticks(rotation=90)
g.fig.suptitle('changes in number of tweets with date for top 10 tweeters ',x=.5,y=1);

wordlist= " ".join(content.replace('https','') for content in tweetdf.tweet_content)
word_cloud = WordCloud(collocations = False, background_color = 'black').generate(wordlist)

# Display the generated Word Cloud

plt.imshow(word_cloud, interpolation='bilinear')

plt.axis("off")
plt.show()

!pip install snscrape

import os
import datetime
import time
import pandas as pd
import snscrape.modules.twitter as sntwitter
import csv

# Define the hashtag and date range
hashtag = "#datafam"
start_date = datetime.date.today() - datetime.timedelta(days=1)
end_date = datetime.date.today()

# Define the output file name
output_file = f"{hashtag}_{start_date}_{end_date}.csv"

# Define the search query
query = f"{hashtag} since:{start_date} until:{end_date}"

# Scrape the tweets and save to a CSV file
with open(output_file, "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["id", "date", "content", "username", "url"])
    for tweet in sntwitter.TwitterSearchScraper(query).get_items():
        writer.writerow([tweet.id, tweet.date, tweet.content, tweet.user.username, tweet.url])

# Wait for 24 hours before running the script again
time.sleep(86400)

‌
‌
‌