Skip to content
# Start coding# Imports
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sn
import nltk
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import (
    TfidfVectorizer,
    CountVectorizer,
    ENGLISH_STOP_WORDS,
)
from wordcloud import WordCloud
from functools import reduce
from nltk import word_tokenize
import glob
path = 'Valerie_Project/'
all_files = glob.glob(path + "/*.csv")
all_files
for filename in all_files:
    nltk.download("punkt")
    df = pd.read_csv(filename, index_col=0, header=0)
    tokens = nltk.word_tokenize(str(df))
    print(tokens)
    import nltk
    nltk.download('averaged_perceptron_tagger')
    nltk.pos_tag(tokens)
    tagged = nltk.pos_tag(tokens)
    import nltk
    nltk.download('words')
    nltk.download('maxent_ne_chunker')
    entities = nltk.chunk.ne_chunk(tagged)
    entities.pprint()
    import nltk
    from nltk.sentiment.vader import SentimentIntensityAnalyzer
    nltk.download('vader_lexicon')
    sia = SentimentIntensityAnalyzer()
    result = sia.polarity_scores(' '.join(tokens))
    import pandas as pd
    resultb = pd.DataFrame(result, index=[0])
#resultb.to_csv('out.csv')
    resultb.to_csv('out.csv', mode='a', index=False, header=True)
#for column in resultb:
  #  print(resultb[column])
#print(resultb) here...