Skip to content
# Start coding# Imports
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sn
import nltk
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import (
TfidfVectorizer,
CountVectorizer,
ENGLISH_STOP_WORDS,
)
from wordcloud import WordCloud
from functools import reduce
from nltk import word_tokenize
import glob
path = 'Valerie_Project/'
all_files = glob.glob(path + "/*.csv")
all_files
for filename in all_files:
nltk.download("punkt")
df = pd.read_csv(filename, index_col=0, header=0)
tokens = nltk.word_tokenize(str(df))
print(tokens)
import nltk
nltk.download('averaged_perceptron_tagger')
nltk.pos_tag(tokens)
tagged = nltk.pos_tag(tokens)
import nltk
nltk.download('words')
nltk.download('maxent_ne_chunker')
entities = nltk.chunk.ne_chunk(tagged)
entities.pprint()
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()
result = sia.polarity_scores(' '.join(tokens))
import pandas as pd
resultb = pd.DataFrame(result, index=[0])
#resultb.to_csv('out.csv')
resultb.to_csv('out.csv', mode='a', index=False, header=True)
#for column in resultb:
# print(resultb[column])
#print(resultb) here...