twitter scraping with snscrape
  • AI Chat
  • Code
  • Report
  • Beta
    Spinner
    !pip install snscrape
    
    
    
    
    !pip install hvplot panel
    
    !pip install wordcloud
    import pandas as pd
    import snscrape.modules.twitter as sntwitter 
    import datetime as dt 
    from wordcloud import WordCloud
    
    import matplotlib.pyplot as plt
    
    import seaborn as sns
    
    import panel as pn
    import hvplot.pandas 
    import hvplot as hv 
    pn.extension('tabular',sizing_mode='sketch_width')
    hv.extension('bokeh')
    # defining color palette
    PAL= ['r','y'.'g']
    pn.Row(pn.layout.HSpacer(height=50,background=PAL[0]))
    list1 = []
    
    for tweet in sntwitter.TwitterHashtagScraper('DataFestAfrica22').get_items():
        list1.append([tweet.date, tweet.id,tweet.retweetedTweet, tweet.content, tweet.user.username, tweet.replyCount, tweet.likeCount,tweet.retweetCount,tweet.source])
    column_names= ['tweet_datetime', 'tweet_id','retweeted_tweet_id', 'tweet_content', 'username', 'numb_reply',
           'numb_likes','numb_retweet','tweet_source']
    tweetdf = pd.DataFrame(list1, columns=column_names)
        
    tweetdf['tweet_date']=tweetdf.tweet_datetime.dt.date
    print(tweetdf.shape)
    print(tweetdf.info())
    
    tweetdf.head()
    tweetdf.to_csv('datafest22_tweeter_data')
    
    tweetdf.tweet_date.value_counts().plot();
    plt.xticks(rotation=90);
    
    tweetdf.username.value_counts()[0:10].plot(kind='bar')
    tweetdf.groupby('username')[['numb_likes','numb_reply']].sum().sort_values(by=['numb_likes','numb_reply'],ascending=False)[0:10].plot(kind='bar')
    top10_tweeters =tweetdf.username.value_counts()[0:10].index
    top10_df= tweetdf[tweetdf.username.isin(top10_tweeters)]
    
    data=top10_df.groupby(['tweet_date','username']).count()['tweet_id'].reset_index()
    order=tweetdf.username.value_counts()[0:10].index
    g=sns.relplot(data=data,x='tweet_date',y='tweet_id',col='username',col_wrap=2,kind='line',col_order=order)
    g.set(ylabel='number of tweets ',xlabel='date');
    plt.xticks(rotation=90)
    g.fig.suptitle('changes in number of tweets with date for top 10 tweeters ',x=.5,y=1);
    
    wordlist= " ".join(content.replace('https','') for content in tweetdf.tweet_content)
    word_cloud = WordCloud(collocations = False, background_color = 'black').generate(wordlist)
    
    # Display the generated Word Cloud
    
    plt.imshow(word_cloud, interpolation='bilinear')
    
    plt.axis("off")
    plt.show()
    !pip install snscrape
    import os
    import datetime
    import time
    import pandas as pd
    import snscrape.modules.twitter as sntwitter
    import csv
    
    # Define the hashtag and date range
    hashtag = "#datafam"
    start_date = datetime.date.today() - datetime.timedelta(days=1)
    end_date = datetime.date.today()
    
    # Define the output file name
    output_file = f"{hashtag}_{start_date}_{end_date}.csv"
    
    # Define the search query
    query = f"{hashtag} since:{start_date} until:{end_date}"
    
    # Scrape the tweets and save to a CSV file
    with open(output_file, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(["id", "date", "content", "username", "url"])
        for tweet in sntwitter.TwitterSearchScraper(query).get_items():
            writer.writerow([tweet.id, tweet.date, tweet.content, tweet.user.username, tweet.url])
    
    # Wait for 24 hours before running the script again
    time.sleep(86400)