Airline Quality Service Analysis¶
In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
In [2]:
!pip install pydrive
Collecting pydrive Downloading https://files.pythonhosted.org/packages/52/e0/0e64788e5dd58ce2d6934549676243dc69d982f198524be9b99e9c2a4fd5/PyDrive-1.3.1.tar.gz (987kB) Collecting google-api-python-client>=1.2 (from pydrive) Downloading https://files.pythonhosted.org/packages/3f/f1/20fd18744c3d20307d634ffcc02592bc7efc45a59624e14655cf21cbfb5e/google_api_python_client-1.7.9-py3-none-any.whl (56kB) Collecting oauth2client>=4.0.0 (from pydrive) Downloading https://files.pythonhosted.org/packages/95/a9/4f25a14d23f0786b64875b91784607c2277eff25d48f915e39ff0cff505a/oauth2client-4.1.3-py2.py3-none-any.whl (98kB) Requirement already satisfied: PyYAML>=3.0 in c:\users\karan\appdata\local\programs\python\python36\lib\site-packages (from pydrive) (3.13) Collecting google-auth-httplib2>=0.0.3 (from google-api-python-client>=1.2->pydrive) Downloading https://files.pythonhosted.org/packages/33/49/c814d6d438b823441552198f096fcd0377fd6c88714dbed34f1d3c8c4389/google_auth_httplib2-0.0.3-py2.py3-none-any.whl Collecting google-auth>=1.4.1 (from google-api-python-client>=1.2->pydrive) Downloading https://files.pythonhosted.org/packages/c5/9b/ed0516cc1f7609fb0217e3057ff4f0f9f3e3ce79a369c6af4a6c5ca25664/google_auth-1.6.3-py2.py3-none-any.whl (73kB) Requirement already satisfied: httplib2<1dev,>=0.9.2 in c:\users\karan\appdata\local\programs\python\python36\lib\site-packages (from google-api-python-client>=1.2->pydrive) (0.12.1) Collecting uritemplate<4dev,>=3.0.0 (from google-api-python-client>=1.2->pydrive) Downloading https://files.pythonhosted.org/packages/e5/7d/9d5a640c4f8bf2c8b1afc015e9a9d8de32e13c9016dcc4b0ec03481fb396/uritemplate-3.0.0-py2.py3-none-any.whl Requirement already satisfied: six<2dev,>=1.6.1 in c:\users\karan\appdata\local\programs\python\python36\lib\site-packages (from google-api-python-client>=1.2->pydrive) (1.12.0) Collecting pyasn1>=0.1.7 (from oauth2client>=4.0.0->pydrive) Downloading https://files.pythonhosted.org/packages/7b/7c/c9386b82a25115cccf1903441bba3cbadcfae7b678a20167347fa8ded34c/pyasn1-0.4.5-py2.py3-none-any.whl (73kB) Collecting rsa>=3.1.4 (from oauth2client>=4.0.0->pydrive) Downloading https://files.pythonhosted.org/packages/02/e5/38518af393f7c214357079ce67a317307936896e961e35450b70fad2a9cf/rsa-4.0-py2.py3-none-any.whl Collecting pyasn1-modules>=0.0.5 (from oauth2client>=4.0.0->pydrive) Downloading https://files.pythonhosted.org/packages/91/f0/b03e00ce9fddf4827c42df1c3ce10c74eadebfb706231e8d6d1c356a4062/pyasn1_modules-0.2.5-py2.py3-none-any.whl (74kB) Collecting cachetools>=2.0.0 (from google-auth>=1.4.1->google-api-python-client>=1.2->pydrive) Downloading https://files.pythonhosted.org/packages/2f/a6/30b0a0bef12283e83e58c1d6e7b5aabc7acfc4110df81a4471655d33e704/cachetools-3.1.1-py2.py3-none-any.whl Building wheels for collected packages: pydrive Building wheel for pydrive (setup.py): started Building wheel for pydrive (setup.py): finished with status 'done' Stored in directory: C:\Users\Karan\AppData\Local\pip\Cache\wheels\fa\d2\9a\d3b6b506c2da98289e5d417215ce34b696db856643bad779f4 Successfully built pydrive Installing collected packages: pyasn1, pyasn1-modules, rsa, cachetools, google-auth, google-auth-httplib2, uritemplate, google-api-python-client, oauth2client, pydrive Successfully installed cachetools-3.1.1 google-api-python-client-1.7.9 google-auth-1.6.3 google-auth-httplib2-0.0.3 oauth2client-4.1.3 pyasn1-0.4.5 pyasn1-modules-0.2.5 pydrive-1.3.1 rsa-4.0 uritemplate-3.0.0
You are using pip version 19.0.3, however version 19.1.1 is available. You should consider upgrading via the 'python -m pip install --upgrade pip' command.
In [3]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
--------------------------------------------------------------------------- ModuleNotFoundError Traceback (most recent call last) <ipython-input-3-04913854c332> in <module> 1 from pydrive.auth import GoogleAuth 2 from pydrive.drive import GoogleDrive ----> 3 from google.colab import auth 4 from oauth2client.client import GoogleCredentials 5 # Authenticate and create the PyDrive client. ModuleNotFoundError: No module named 'google.colab'
In [ ]:
link='https://drive.google.com/open?id=1tmzZKQKEvxt61TxjHchFfJkpqklVgdzP'
fluff,id=link.split('=')
downloaded = drive.CreateFile({'id':id})
downloaded.GetContentFile('airline.csv')
airline_data = pd.read_csv('airline.csv')
In [5]:
airline_data.head(1)
Out[5]:
airline_name | link | title | author | author_country | date | content | aircraft | type_traveller | cabin_flown | route | overall_rating | seat_comfort_rating | cabin_staff_rating | food_beverages_rating | inflight_entertainment_rating | ground_service_rating | wifi_connectivity_rating | value_money_rating | recommended | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | adria-airways | /airline-reviews/adria-airways | Adria Airways customer review | D Ito | Germany | 2015-04-10 | Outbound flight FRA/PRN A319. 2 hours 10 min f… | NaN | NaN | Economy | NaN | 7.0 | 4.0 | 4.0 | 4.0 | 0.0 | NaN | NaN | 4.0 | 1 |
Analysing the DataSet¶
In [6]:
round((airline_data.isna().sum())/len(airline_data),2)
Out[6]:
airline_name 0.00 link 0.00 title 0.00 author 0.00 author_country 0.04 date 0.00 content 0.00 aircraft 0.97 type_traveller 0.94 cabin_flown 0.07 route 0.94 overall_rating 0.11 seat_comfort_rating 0.19 cabin_staff_rating 0.19 food_beverages_rating 0.20 inflight_entertainment_rating 0.25 ground_service_rating 0.95 wifi_connectivity_rating 0.99 value_money_rating 0.04 recommended 0.00 dtype: float64
In [ ]:
airline_data=airline_data[airline_data['overall_rating'].notnull()]
In [8]:
round((airline_data.isna().sum())/len(airline_data),2)
Out[8]:
airline_name 0.00 link 0.00 title 0.00 author 0.00 author_country 0.02 date 0.00 content 0.00 aircraft 0.97 type_traveller 0.94 cabin_flown 0.05 route 0.94 overall_rating 0.00 seat_comfort_rating 0.17 cabin_staff_rating 0.17 food_beverages_rating 0.18 inflight_entertainment_rating 0.23 ground_service_rating 0.94 wifi_connectivity_rating 0.98 value_money_rating 0.03 recommended 0.00 dtype: float64
Calcluating Total Reviews to Each airline¶
In [9]:
airline_names=airline_data.airline_name.unique()
print('Total airlines Considered for Analysis : ',len(airline_names))
total_reviews_each_airline=[]
for i in airline_names:
temp=airline_data[airline_data.airline_name==i]
total_reviews_each_airline.append(len(temp))
result=list(zip(airline_names,total_reviews_each_airline))
print('Total Reviews Analysed : ',sum(total_reviews_each_airline))
Total airlines Considered for Analysis : 357 Total Reviews Analysed : 36861
In [10]:
df=pd.DataFrame(result,columns=['Airline_Name','Total_Reviews'])
df=df.sort_values(by='Total_Reviews',ascending=False)
df.head()
Out[10]:
Airline_Name | Total_Reviews | |
---|---|---|
295 | spirit-airlines | 966 |
97 | british-airways | 896 |
333 | united-airlines | 839 |
20 | air-canada-rouge | 715 |
138 | emirates | 690 |
In [ ]:
In [11]:
plt.style.use('seaborn')
plt.xlabel('Airlines')
plt.ylabel('Reviews')
plt.bar(df.Airline_Name[:5],df.Total_Reviews[:5],label='Top 5 Airlines')
plt.legend()
Out[11]:
<matplotlib.legend.Legend at 0x7f1609e78ac8>
Finding Polarity of Each Review¶
In [ ]:
from textblob import TextBlob
In [ ]:
result1=[]
num=1
numlist=[]
for review in airline_data.content:
analysis=TextBlob(review)
result1.append(analysis.polarity)
numlist.append(num)
num=num+1
result1=np.array(result1)
In [ ]:
Plotting Polarity¶
In [16]:
plt.style.use('seaborn')
plt.scatter(numlist,result1,label='Polarity')
plt.xlabel('Reviews')
plt.ylabel('Polarity')
plt.legend()
Out[16]:
<matplotlib.legend.Legend at 0x7f15ffe53e10>
Generating Word Cloud¶
In [ ]:
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
text=''
for i in airline_data.content:
text=text+i+' '
In [ ]:
wordcloud = WordCloud().generate(text)
# Display the generated image:
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()
In [ ]:
from PIL import Image
import requests
from io import BytesIO
response = requests.get("http://www.pngmart.com/files/7/Modern-Plane-PNG-HD.png")
mask = np.array(Image.open(BytesIO(response.content)))
wordcloud_fra = WordCloud(background_color="white", mode="RGBA", max_words=1000, mask=mask).generate(text)
# create coloring from image
image_colors = ImageColorGenerator(mask)
plt.figure(figsize=[16,16])
plt.imshow(wordcloud_fra.recolor(color_func=image_colors), interpolation="bilinear")
plt.axis("off")
Out[ ]:
(-0.5, 2718.5, 944.5, -0.5)
In [ ]:
Providing Sentiment Value to Each Review According to Rating¶
In [ ]:
sentiment=[]
for i in airline_data.overall_rating:
if(i<=5 and i>=0):
sentiment.append(0)
else:
sentiment.append(1)
Sentiment from polarity¶
In [ ]:
sentiment=[]
for i in result1:
if (i>0):
sentiment.append(1)
elif (i<0):
sentiment.append(-1)
elif (i==0):
sentiment.append(0)
In [ ]:
new_airline_data=airline_data.copy()
In [ ]:
new_airline_data['sentiment']=sentiment
In [ ]:
### Shuffline the Dataset for Training ###
from sklearn.utils import shuffle
new_airline_data=shuffle(new_airline_data)
In [94]:
positive_sentiment_count=new_airline_data[new_airline_data['sentiment']==1]
negative_sentiment_count=new_airline_data[new_airline_data['sentiment']==-1]
neutral_sentiment_count=new_airline_data[new_airline_data['sentiment']==0]
temp_array=[len(positive_sentiment_count),len(negative_sentiment_count),len(neutral_sentiment_count)]
x_axis_labels=['Positive_sentiment','Negative_sentiment','Neutral_sentiment']
plt.bar(x_axis_labels,temp_array,color=('green','red','blue'),width=(0.2,0.2,0.2))
Out[94]:
<BarContainer object of 3 artists>
Logisitc Regression¶
In [ ]:
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import CountVectorizer
In [ ]:
model=LogisticRegression()
vectorizer=CountVectorizer(ngram_range=(1,2))
x_l=vectorizer.fit_transform(new_airline_data.content.values)
In [23]:
model.fit(x_l[:29488],new_airline_data.sentiment[:29488].values)
/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning) /usr/local/lib/python3.6/dist-packages/sklearn/linear_model/logistic.py:460: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning. "this warning.", FutureWarning)
Out[23]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, multi_class='warn', n_jobs=None, penalty='l2', random_state=None, solver='warn', tol=0.0001, verbose=0, warm_start=False)
In [24]:
model.score(x_l[29488:],new_airline_data.sentiment[29488:].values)
Out[24]:
0.9178082191780822
In [106]:
new_airline_data.content[0]
Out[106]:
"Outbound flight FRA/PRN A319. 2 hours 10 min flight. I thought drinks/snacks for sale but sandwich soft drinks were served complimentary. Inbound flights SKP/LJU/FRA CRJ900. each 1 hour 30 min flight. Skyshop menu was in a seat pocket and drinks/snacks were for sale. All flight crews were friendly. Security check at the Ljubljana airport for transit passengers was chaos however it's possible to go to a gate within 30min."
In [ ]:
y_predict=model.predict(x_l[29488:])
In [ ]:
x_axis=[]
n=1
for i in range(0,len(new_airline_data[29488:].values)):
x_axis.append(n)
n=n+1
In [ ]:
In [28]:
plt.scatter(x_axis,new_airline_data.sentiment[29488:],color='red',label='predicted')
plt.scatter(x_axis,y_predict,label='actual')
plt.legend(loc='best')
Out[28]:
<matplotlib.legend.Legend at 0x7f15ffe95320>
Linear Support Vector Machine¶
In [ ]:
from sklearn.svm import LinearSVC
In [ ]:
clf = LinearSVC(random_state=42, tol=1e-5)
In [31]:
clf.fit(x_l[:29488],new_airline_data.sentiment[:29488])
/usr/local/lib/python3.6/dist-packages/sklearn/svm/base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. "the number of iterations.", ConvergenceWarning)
Out[31]:
LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True, intercept_scaling=1, loss='squared_hinge', max_iter=1000, multi_class='ovr', penalty='l2', random_state=42, tol=1e-05, verbose=0)
In [32]:
clf.score(x_l[29488:],new_airline_data.sentiment[29488:])
Out[32]:
0.9152312491523125
In [ ]:
y_predict1=clf.predict(x_l[29488:])
In [71]:
plt.scatter(x_axis,new_airline_data.sentiment[29488:],color='red',label='predicted')
plt.scatter(x_axis,y_predict1,label='actual')
plt.legend()
Out[71]:
<matplotlib.legend.Legend at 0x7f15ec83fb70>
**Preprocessing the Reviews **¶
In [ ]:
t=new_airline_data.copy()
In [ ]:
pos_sentiment=[]
neg_sentiment=[]
net_sentiment=[]
avg_rating=[]
for i in airline_names:
tempdf=t[t['airline_name']==i]
pos=len(tempdf[tempdf['sentiment']==1])
neg=len(tempdf[tempdf['sentiment']==-1])
net=len(tempdf[tempdf['sentiment']==0])
pos_sentiment.append(pos)
neg_sentiment.append(neg)
net_sentiment.append(net)
avg_rate=tempdf.overall_rating.mean()
avg_rating.append(avg_rate)
clustered_data=pd.DataFrame(list(zip(airline_names,avg_rating,pos_sentiment,net_sentiment,neg_sentiment)),columns=['airline_name','average_rating','pos_sentiment','net_sentiment','neg_sentiment'])
In [ ]:
clustered_data=clustered_data.sort_values(by=['pos_sentiment','average_rating'],kind='mergesort',ascending=False)
In [39]:
clustered_data.head()
Out[39]:
airline_name | average_rating | pos_sentiment | net_sentiment | neg_sentiment | |
---|---|---|---|---|---|
97 | british-airways | 5.881696 | 703 | 2 | 191 |
138 | emirates | 6.246377 | 558 | 1 | 131 |
295 | spirit-airlines | 2.902692 | 556 | 15 | 395 |
333 | united-airlines | 3.356377 | 523 | 5 | 311 |
215 | lufthansa | 6.993333 | 511 | 1 | 88 |
In [47]:
plt.figure(figsize=(26,26))
plt.bar(clustered_data.airline_name.head(100).values,clustered_data.pos_sentiment.head(100).values,label='Positive Sentiment',color='green')
plt.bar(clustered_data.airline_name.head(100).values,clustered_data.neg_sentiment.head(100).values,label='Negative Sentiment',color='red')
plt.bar(clustered_data.airline_name.head(100).values,clustered_data.net_sentiment.head(100).values,label='Neutral Sentiment',color='black')
plt.xticks(rotation=90)
plt.legend()
Out[47]:
<matplotlib.legend.Legend at 0x7f15f7f3bfd0>
In [ ]:
Preprocessing Reviews and Removing Stop Words¶
In [ ]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize
from nltk.stem import WordNetLemmatizer
import string
from nltk.tokenize import word_tokenize
In [49]:
nltk.download('stopwords')
[nltk_data] Downloading package stopwords to /root/nltk_data... [nltk_data] Unzipping corpora/stopwords.zip.
Out[49]:
True
In [50]:
nltk.download('punkt')
nltk.download('wordnet')
[nltk_data] Downloading package punkt to /root/nltk_data... [nltk_data] Unzipping tokenizers/punkt.zip. [nltk_data] Downloading package wordnet to /root/nltk_data... [nltk_data] Unzipping corpora/wordnet.zip.
Out[50]:
True
In [ ]:
stop = set(stopwords.words("english"))
In [ ]:
def preprocessing(text):
le = WordNetLemmatizer()
words = word_tokenize(text)
words = [x for x in words if not x in stop]
words = [le.lemmatize(x) for x in words]
return " ".join(words)
In [ ]:
reviews2 = [preprocessing(x) for x in new_airline_data.content]
In [ ]:
new_airline_data['reviews_processed']=reviews2
In [ ]:
Logisitic Regression on Processed Data¶
In [ ]:
logic_model=LogisticRegression()
vectorizer_p=CountVectorizer(ngram_range=(1,2))
x_l_1=vectorizer_p.fit_transform(new_airline_data.reviews_processed.values)
In [56]:
logic_model.fit(x_l_1[:29488],new_airline_data.sentiment[:29488].values)
/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning) /usr/local/lib/python3.6/dist-packages/sklearn/linear_model/logistic.py:460: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning. "this warning.", FutureWarning)
Out[56]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, multi_class='warn', n_jobs=None, penalty='l2', random_state=None, solver='warn', tol=0.0001, verbose=0, warm_start=False)
In [57]:
logic_model.score(x_l_1[29488:],new_airline_data.sentiment[29488:].values)
Out[57]:
0.9115692391156924
Implementing Linear SVM¶
In [ ]:
clf1 = LinearSVC(random_state=42, tol=1e-5)
In [59]:
clf1.fit(x_l_1[:29488],new_airline_data.sentiment[:29488])
/usr/local/lib/python3.6/dist-packages/sklearn/svm/base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. "the number of iterations.", ConvergenceWarning)
Out[59]:
LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True, intercept_scaling=1, loss='squared_hinge', max_iter=1000, multi_class='ovr', penalty='l2', random_state=42, tol=1e-05, verbose=0)
In [60]:
clf1.score(x_l_1[29488:],new_airline_data.sentiment[29488:])
Out[60]:
0.9092635290926353
In [ ]:
Working With Neural Networks¶
In [ ]:
new_airline_data.head(1)
Out[ ]:
airline_name | link | title | author | author_country | date | content | aircraft | type_traveller | cabin_flown | … | seat_comfort_rating | cabin_staff_rating | food_beverages_rating | inflight_entertainment_rating | ground_service_rating | wifi_connectivity_rating | value_money_rating | recommended | sentiment | reviews_processed | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
31578 | singapore-airlines | /airline-reviews/singapore-airlines | Singapore Airlines customer review | A Turner | Australia | 2014-01-09 | Flew economy from Frankfurt to Singapore. Food… | NaN | NaN | Economy | … | 4.0 | 5.0 | 4.0 | 4.0 | NaN | NaN | 5.0 | 1 | 1 | Flew economy Frankfurt Singapore . Food decent… |
1 rows × 22 columns
In [ ]:
reviews_array=np.array(new_airline_data.content)
In [ ]:
sentiment_array=np.array(new_airline_data.sentiment)
In [ ]:
from keras.datasets import imdb
from keras.layers import Dense,Conv1D,MaxPool1D,Embedding,Flatten,Dropout,GRU,LSTM
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import OneHotEncoder
from keras.optimizers import Adam
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
Using TensorFlow backend.
In [ ]:
token=Tokenizer()
token.fit_on_texts(reviews_array)
vocab_size=len(token.word_index) +1
print(vocab_size)
37678
In [ ]:
l = 0
for i in reviews_array:
l += len(i)
avg_length = l/len(reviews_array)
review_training = [x[:int(avg_length)] for x in reviews_array]
In [ ]:
# encoded = token.texts_to_sequences(reviews_array)
# l = []
# for i in encoded:
# l.append(len(i))
# print(l/len(encoded))
In [ ]:
padded_docs = pad_sequences(encoded, maxlen=38, padding='post')
LSTM¶
In [ ]:
n_model1 = Sequential()
n_model1.add(Embedding(37678,64,input_length=38))
n_model1.add(LSTM(64, activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0.0, recurrent_dropout=0.0, implementation=1, return_sequences=True, return_state=False, go_backwards=False, stateful=False, unroll=False))
n_model1.add(Dropout(0.5))
n_model1.add(LSTM(64,return_sequences=False))
n_model1.add(Dropout(0.5))
n_model1.add(Dense(1,activation="sigmoid"))
n_model1.summary()
n_model1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= embedding_2 (Embedding) (None, 38, 64) 2411392 _________________________________________________________________ lstm_3 (LSTM) (None, 38, 64) 33024 _________________________________________________________________ dropout_3 (Dropout) (None, 38, 64) 0 _________________________________________________________________ lstm_4 (LSTM) (None, 64) 33024 _________________________________________________________________ dropout_4 (Dropout) (None, 64) 0 _________________________________________________________________ dense_2 (Dense) (None, 1) 65 ================================================================= Total params: 2,477,505 Trainable params: 2,477,505 Non-trainable params: 0 _________________________________________________________________
In [ ]:
hist = n_model1.fit(padded_docs,sentiment_array,epochs=5,batch_size=100,validation_split=0.2)
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version. Instructions for updating: Use tf.cast instead. Train on 29488 samples, validate on 7373 samples Epoch 1/5 29488/29488 [==============================] - 40s 1ms/step - loss: 0.4716 - acc: 0.7725 - val_loss: 0.4018 - val_acc: 0.8283 Epoch 2/5 29488/29488 [==============================] - 38s 1ms/step - loss: 0.3562 - acc: 0.8549 - val_loss: 0.4038 - val_acc: 0.8204 Epoch 3/5 29488/29488 [==============================] - 37s 1ms/step - loss: 0.2973 - acc: 0.8836 - val_loss: 0.4231 - val_acc: 0.8142 Epoch 4/5 29488/29488 [==============================] - 37s 1ms/step - loss: 0.2439 - acc: 0.9078 - val_loss: 0.4939 - val_acc: 0.8013 Epoch 5/5 29488/29488 [==============================] - 36s 1ms/step - loss: 0.1928 - acc: 0.9301 - val_loss: 0.5099 - val_acc: 0.7991
In [ ]:
CNN¶
In [ ]:
n_model2 = Sequential()
n_model2.add(Embedding(37678,100,input_length=38))
n_model2.add(Conv1D(filters=64,kernel_size=3))
n_model2.add(MaxPool1D(pool_size=3))
n_model2.add(Flatten())
n_model2.add(Dense(64,activation="relu"))
n_model2.add(Dropout(rate = 0.2))
n_model2.add(Dense(1,activation="sigmoid"))
n_model2.summary()
adm = Adam(lr=0.00001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
n_model2.compile(loss='binary_crossentropy', optimizer=adm, metrics=['accuracy'])
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= embedding_5 (Embedding) (None, 38, 100) 3767800 _________________________________________________________________ conv1d_3 (Conv1D) (None, 36, 64) 19264 _________________________________________________________________ max_pooling1d_3 (MaxPooling1 (None, 12, 64) 0 _________________________________________________________________ flatten_3 (Flatten) (None, 768) 0 _________________________________________________________________ dense_7 (Dense) (None, 64) 49216 _________________________________________________________________ dropout_7 (Dropout) (None, 64) 0 _________________________________________________________________ dense_8 (Dense) (None, 1) 65 ================================================================= Total params: 3,836,345 Trainable params: 3,836,345 Non-trainable params: 0 _________________________________________________________________
In [ ]:
hist = n_model2.fit(padded_docs,sentiment_array,epochs=100,batch_size=100,validation_split=0.1)
Train on 33174 samples, validate on 3687 samples Epoch 1/100 33174/33174 [==============================] - 23s 688us/step - loss: 0.6859 - acc: 0.5677 - val_loss: 0.6800 - val_acc: 0.5837 Epoch 2/100 33174/33174 [==============================] - 22s 654us/step - loss: 0.6750 - acc: 0.5908 - val_loss: 0.6750 - val_acc: 0.5837 Epoch 3/100 33174/33174 [==============================] - 22s 660us/step - loss: 0.6706 - acc: 0.5908 - val_loss: 0.6711 - val_acc: 0.5837 Epoch 4/100 33174/33174 [==============================] - 22s 666us/step - loss: 0.6650 - acc: 0.5908 - val_loss: 0.6655 - val_acc: 0.5837 Epoch 5/100 33174/33174 [==============================] - 22s 650us/step - loss: 0.6575 - acc: 0.5908 - val_loss: 0.6567 - val_acc: 0.5837 Epoch 6/100 33174/33174 [==============================] - 22s 650us/step - loss: 0.6460 - acc: 0.5915 - val_loss: 0.6425 - val_acc: 0.5891 Epoch 7/100 33174/33174 [==============================] - 23s 680us/step - loss: 0.6284 - acc: 0.6121 - val_loss: 0.6218 - val_acc: 0.6222 Epoch 8/100 33174/33174 [==============================] - 21s 643us/step - loss: 0.6046 - acc: 0.6722 - val_loss: 0.5949 - val_acc: 0.6813 Epoch 9/100 33174/33174 [==============================] - 22s 653us/step - loss: 0.5754 - acc: 0.7240 - val_loss: 0.5653 - val_acc: 0.7266 Epoch 10/100 33174/33174 [==============================] - 22s 653us/step - loss: 0.5463 - acc: 0.7511 - val_loss: 0.5368 - val_acc: 0.7513 Epoch 11/100 33174/33174 [==============================] - 22s 668us/step - loss: 0.5190 - acc: 0.7679 - val_loss: 0.5122 - val_acc: 0.7624 Epoch 12/100 33174/33174 [==============================] - 22s 654us/step - loss: 0.4961 - acc: 0.7791 - val_loss: 0.4915 - val_acc: 0.7711 Epoch 13/100 33174/33174 [==============================] - 22s 650us/step - loss: 0.4740 - acc: 0.7915 - val_loss: 0.4739 - val_acc: 0.7792 Epoch 14/100 33174/33174 [==============================] - 23s 684us/step - loss: 0.4568 - acc: 0.7988 - val_loss: 0.4589 - val_acc: 0.7920 Epoch 15/100 33174/33174 [==============================] - 22s 662us/step - loss: 0.4407 - acc: 0.8070 - val_loss: 0.4464 - val_acc: 0.7979 Epoch 16/100 33174/33174 [==============================] - 22s 652us/step - loss: 0.4279 - acc: 0.8146 - val_loss: 0.4363 - val_acc: 0.8042 Epoch 17/100 33174/33174 [==============================] - 22s 651us/step - loss: 0.4152 - acc: 0.8216 - val_loss: 0.4278 - val_acc: 0.8082 Epoch 18/100 33174/33174 [==============================] - 22s 671us/step - loss: 0.4042 - acc: 0.8270 - val_loss: 0.4205 - val_acc: 0.8142 Epoch 19/100 33174/33174 [==============================] - 21s 645us/step - loss: 0.3949 - acc: 0.8335 - val_loss: 0.4149 - val_acc: 0.8150 Epoch 20/100 33174/33174 [==============================] - 22s 667us/step - loss: 0.3867 - acc: 0.8370 - val_loss: 0.4102 - val_acc: 0.8205 Epoch 21/100 33174/33174 [==============================] - 23s 692us/step - loss: 0.3798 - acc: 0.8412 - val_loss: 0.4065 - val_acc: 0.8183 Epoch 22/100 33174/33174 [==============================] - 23s 685us/step - loss: 0.3734 - acc: 0.8447 - val_loss: 0.4034 - val_acc: 0.8205 Epoch 23/100 33174/33174 [==============================] - 22s 677us/step - loss: 0.3662 - acc: 0.8477 - val_loss: 0.4008 - val_acc: 0.8221 Epoch 24/100 33174/33174 [==============================] - 23s 681us/step - loss: 0.3606 - acc: 0.8503 - val_loss: 0.3987 - val_acc: 0.8226 Epoch 25/100 33174/33174 [==============================] - 23s 684us/step - loss: 0.3556 - acc: 0.8527 - val_loss: 0.3968 - val_acc: 0.8223 Epoch 26/100 33174/33174 [==============================] - 23s 683us/step - loss: 0.3503 - acc: 0.8571 - val_loss: 0.3956 - val_acc: 0.8237 Epoch 27/100 33174/33174 [==============================] - 22s 663us/step - loss: 0.3447 - acc: 0.8585 - val_loss: 0.3943 - val_acc: 0.8245 Epoch 28/100 33174/33174 [==============================] - 22s 675us/step - loss: 0.3403 - acc: 0.8598 - val_loss: 0.3934 - val_acc: 0.8253 Epoch 29/100 33174/33174 [==============================] - 22s 672us/step - loss: 0.3354 - acc: 0.8632 - val_loss: 0.3929 - val_acc: 0.8270 Epoch 30/100 33174/33174 [==============================] - 22s 656us/step - loss: 0.3318 - acc: 0.8653 - val_loss: 0.3922 - val_acc: 0.8248 Epoch 31/100 33174/33174 [==============================] - 22s 654us/step - loss: 0.3279 - acc: 0.8658 - val_loss: 0.3919 - val_acc: 0.8242 Epoch 32/100 33174/33174 [==============================] - 22s 668us/step - loss: 0.3234 - acc: 0.8707 - val_loss: 0.3922 - val_acc: 0.8256 Epoch 33/100 33174/33174 [==============================] - 22s 669us/step - loss: 0.3186 - acc: 0.8708 - val_loss: 0.3918 - val_acc: 0.8248 Epoch 34/100 33174/33174 [==============================] - 22s 657us/step - loss: 0.3145 - acc: 0.8739 - val_loss: 0.3919 - val_acc: 0.8242 Epoch 35/100 33174/33174 [==============================] - 21s 647us/step - loss: 0.3104 - acc: 0.8756 - val_loss: 0.3921 - val_acc: 0.8248 Epoch 36/100 33174/33174 [==============================] - 22s 670us/step - loss: 0.3074 - acc: 0.8781 - val_loss: 0.3926 - val_acc: 0.8256 Epoch 37/100 33174/33174 [==============================] - 22s 667us/step - loss: 0.3032 - acc: 0.8789 - val_loss: 0.3929 - val_acc: 0.8256 Epoch 38/100 33174/33174 [==============================] - 22s 666us/step - loss: 0.2990 - acc: 0.8813 - val_loss: 0.3934 - val_acc: 0.8229 Epoch 39/100 33174/33174 [==============================] - 22s 661us/step - loss: 0.2967 - acc: 0.8813 - val_loss: 0.3940 - val_acc: 0.8240 Epoch 40/100 33174/33174 [==============================] - 23s 684us/step - loss: 0.2921 - acc: 0.8849 - val_loss: 0.3948 - val_acc: 0.8223 Epoch 41/100 33174/33174 [==============================] - 22s 664us/step - loss: 0.2881 - acc: 0.8864 - val_loss: 0.3957 - val_acc: 0.8226 Epoch 42/100 33174/33174 [==============================] - 23s 684us/step - loss: 0.2840 - acc: 0.8885 - val_loss: 0.3967 - val_acc: 0.8215 Epoch 43/100 33174/33174 [==============================] - 22s 670us/step - loss: 0.2811 - acc: 0.8896 - val_loss: 0.3976 - val_acc: 0.8221 Epoch 44/100 33174/33174 [==============================] - 23s 679us/step - loss: 0.2775 - acc: 0.8914 - val_loss: 0.3989 - val_acc: 0.8199 Epoch 45/100 33174/33174 [==============================] - 22s 657us/step - loss: 0.2734 - acc: 0.8927 - val_loss: 0.4007 - val_acc: 0.8186 Epoch 46/100 33174/33174 [==============================] - 22s 652us/step - loss: 0.2703 - acc: 0.8943 - val_loss: 0.4015 - val_acc: 0.8169 Epoch 47/100 33174/33174 [==============================] - 22s 663us/step - loss: 0.2668 - acc: 0.8964 - val_loss: 0.4026 - val_acc: 0.8183 Epoch 48/100 33174/33174 [==============================] - 22s 659us/step - loss: 0.2632 - acc: 0.8978 - val_loss: 0.4041 - val_acc: 0.8183 Epoch 49/100 33174/33174 [==============================] - 21s 645us/step - loss: 0.2603 - acc: 0.8988 - val_loss: 0.4057 - val_acc: 0.8172 Epoch 50/100 33174/33174 [==============================] - 21s 640us/step - loss: 0.2564 - acc: 0.9022 - val_loss: 0.4075 - val_acc: 0.8161 Epoch 51/100 33174/33174 [==============================] - 22s 674us/step - loss: 0.2532 - acc: 0.9034 - val_loss: 0.4093 - val_acc: 0.8158 Epoch 52/100 33174/33174 [==============================] - 22s 673us/step - loss: 0.2501 - acc: 0.9053 - val_loss: 0.4108 - val_acc: 0.8172 Epoch 53/100 33174/33174 [==============================] - 22s 664us/step - loss: 0.2465 - acc: 0.9067 - val_loss: 0.4134 - val_acc: 0.8150 Epoch 54/100 33174/33174 [==============================] - 22s 665us/step - loss: 0.2426 - acc: 0.9076 - val_loss: 0.4148 - val_acc: 0.8164 Epoch 55/100 33174/33174 [==============================] - 23s 683us/step - loss: 0.2399 - acc: 0.9095 - val_loss: 0.4167 - val_acc: 0.8164 Epoch 56/100 33174/33174 [==============================] - 23s 690us/step - loss: 0.2368 - acc: 0.9101 - val_loss: 0.4192 - val_acc: 0.8153 Epoch 57/100 33174/33174 [==============================] - 22s 664us/step - loss: 0.2327 - acc: 0.9132 - val_loss: 0.4212 - val_acc: 0.8153 Epoch 58/100 33174/33174 [==============================] - 22s 676us/step - loss: 0.2298 - acc: 0.9140 - val_loss: 0.4236 - val_acc: 0.8156 Epoch 59/100 33174/33174 [==============================] - 22s 658us/step - loss: 0.2268 - acc: 0.9156 - val_loss: 0.4259 - val_acc: 0.8153 Epoch 60/100 33174/33174 [==============================] - 22s 664us/step - loss: 0.2242 - acc: 0.9170 - val_loss: 0.4283 - val_acc: 0.8142 Epoch 61/100 33174/33174 [==============================] - 22s 668us/step - loss: 0.2204 - acc: 0.9185 - val_loss: 0.4307 - val_acc: 0.8145 Epoch 62/100 33174/33174 [==============================] - 23s 690us/step - loss: 0.2169 - acc: 0.9201 - val_loss: 0.4336 - val_acc: 0.8148 Epoch 63/100 33174/33174 [==============================] - 22s 678us/step - loss: 0.2136 - acc: 0.9219 - val_loss: 0.4358 - val_acc: 0.8120 Epoch 64/100 33174/33174 [==============================] - 22s 671us/step - loss: 0.2109 - acc: 0.9223 - val_loss: 0.4385 - val_acc: 0.8126 Epoch 65/100 33174/33174 [==============================] - 23s 681us/step - loss: 0.2073 - acc: 0.9254 - val_loss: 0.4413 - val_acc: 0.8126 Epoch 66/100 33174/33174 [==============================] - 22s 664us/step - loss: 0.2051 - acc: 0.9250 - val_loss: 0.4445 - val_acc: 0.8134 Epoch 67/100 33174/33174 [==============================] - 22s 652us/step - loss: 0.2010 - acc: 0.9280 - val_loss: 0.4474 - val_acc: 0.8126 Epoch 68/100 33174/33174 [==============================] - 21s 646us/step - loss: 0.1981 - acc: 0.9274 - val_loss: 0.4504 - val_acc: 0.8118 Epoch 69/100 33174/33174 [==============================] - 22s 662us/step - loss: 0.1952 - acc: 0.9291 - val_loss: 0.4537 - val_acc: 0.8112 Epoch 70/100 33174/33174 [==============================] - 22s 668us/step - loss: 0.1919 - acc: 0.9320 - val_loss: 0.4570 - val_acc: 0.8104 Epoch 71/100 33174/33174 [==============================] - 21s 635us/step - loss: 0.1896 - acc: 0.9337 - val_loss: 0.4603 - val_acc: 0.8110 Epoch 72/100 33174/33174 [==============================] - 21s 638us/step - loss: 0.1858 - acc: 0.9354 - val_loss: 0.4644 - val_acc: 0.8115 Epoch 73/100 33174/33174 [==============================] - 22s 662us/step - loss: 0.1834 - acc: 0.9362 - val_loss: 0.4682 - val_acc: 0.8101 Epoch 74/100 33174/33174 [==============================] - 21s 640us/step - loss: 0.1808 - acc: 0.9371 - val_loss: 0.4719 - val_acc: 0.8080 Epoch 75/100 33174/33174 [==============================] - 21s 636us/step - loss: 0.1780 - acc: 0.9371 - val_loss: 0.4741 - val_acc: 0.8099 Epoch 76/100 33174/33174 [==============================] - 22s 667us/step - loss: 0.1748 - acc: 0.9397 - val_loss: 0.4778 - val_acc: 0.8088 Epoch 77/100 33174/33174 [==============================] - 22s 662us/step - loss: 0.1718 - acc: 0.9406 - val_loss: 0.4817 - val_acc: 0.8074 Epoch 78/100 33174/33174 [==============================] - 21s 648us/step - loss: 0.1689 - acc: 0.9425 - val_loss: 0.4855 - val_acc: 0.8069 Epoch 79/100 33174/33174 [==============================] - 22s 653us/step - loss: 0.1665 - acc: 0.9440 - val_loss: 0.4891 - val_acc: 0.8061 Epoch 80/100 33174/33174 [==============================] - 22s 677us/step - loss: 0.1640 - acc: 0.9447 - val_loss: 0.4935 - val_acc: 0.8050 Epoch 81/100 33174/33174 [==============================] - 22s 666us/step - loss: 0.1610 - acc: 0.9461 - val_loss: 0.4976 - val_acc: 0.8034 Epoch 82/100 33174/33174 [==============================] - 22s 654us/step - loss: 0.1590 - acc: 0.9472 - val_loss: 0.5014 - val_acc: 0.8031 Epoch 83/100 33174/33174 [==============================] - 22s 665us/step - loss: 0.1558 - acc: 0.9482 - val_loss: 0.5052 - val_acc: 0.8028 Epoch 84/100 33174/33174 [==============================] - 23s 688us/step - loss: 0.1535 - acc: 0.9489 - val_loss: 0.5103 - val_acc: 0.8007 Epoch 85/100 33174/33174 [==============================] - 21s 647us/step - loss: 0.1508 - acc: 0.9511 - val_loss: 0.5143 - val_acc: 0.8009 Epoch 86/100 33174/33174 [==============================] - 22s 652us/step - loss: 0.1480 - acc: 0.9518 - val_loss: 0.5182 - val_acc: 0.8007 Epoch 87/100 33174/33174 [==============================] - 22s 672us/step - loss: 0.1459 - acc: 0.9529 - val_loss: 0.5224 - val_acc: 0.7996 Epoch 88/100 33174/33174 [==============================] - 22s 669us/step - loss: 0.1430 - acc: 0.9542 - val_loss: 0.5272 - val_acc: 0.7977 Epoch 89/100 33174/33174 [==============================] - 22s 659us/step - loss: 0.1406 - acc: 0.9550 - val_loss: 0.5316 - val_acc: 0.7979 Epoch 90/100 33174/33174 [==============================] - 22s 672us/step - loss: 0.1379 - acc: 0.9561 - val_loss: 0.5375 - val_acc: 0.7979 Epoch 91/100 33174/33174 [==============================] - 23s 680us/step - loss: 0.1356 - acc: 0.9570 - val_loss: 0.5415 - val_acc: 0.7960 Epoch 92/100 33174/33174 [==============================] - 22s 663us/step - loss: 0.1328 - acc: 0.9592 - val_loss: 0.5453 - val_acc: 0.7947 Epoch 93/100 33174/33174 [==============================] - 22s 667us/step - loss: 0.1304 - acc: 0.9597 - val_loss: 0.5511 - val_acc: 0.7947 Epoch 94/100 33174/33174 [==============================] - 22s 665us/step - loss: 0.1282 - acc: 0.9607 - val_loss: 0.5572 - val_acc: 0.7917 Epoch 95/100 33174/33174 [==============================] - 22s 673us/step - loss: 0.1261 - acc: 0.9614 - val_loss: 0.5609 - val_acc: 0.7917 Epoch 96/100 33174/33174 [==============================] - 21s 647us/step - loss: 0.1240 - acc: 0.9620 - val_loss: 0.5661 - val_acc: 0.7917 Epoch 97/100 33174/33174 [==============================] - 22s 669us/step - loss: 0.1207 - acc: 0.9638 - val_loss: 0.5713 - val_acc: 0.7909 Epoch 98/100 33174/33174 [==============================] - 23s 687us/step - loss: 0.1189 - acc: 0.9644 - val_loss: 0.5763 - val_acc: 0.7890 Epoch 99/100 33174/33174 [==============================] - 22s 659us/step - loss: 0.1165 - acc: 0.9647 - val_loss: 0.5817 - val_acc: 0.7890 Epoch 100/100 33174/33174 [==============================] - 22s 661us/step - loss: 0.1144 - acc: 0.9665 - val_loss: 0.5870 - val_acc: 0.7879
In [ ]:
plt.plot(hist.history["acc"],label="acc")
plt.plot(hist.history["val_acc"],label="val")
plt.legend()
Out[ ]:
<matplotlib.legend.Legend at 0x7fa03f688400>
In [ ]:
model_conv = Sequential()
model_conv.add(Embedding(vocab_size, 100, input_length=38))
model_conv.add(Dropout(0.2))
model_conv.add(Conv1D(64, 5, activation='relu'))
model_conv.add(MaxPool1D(pool_size=4))
model_conv.add(LSTM(100))
model_conv.add(Dense(1, activation='sigmoid'))
adm = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model_conv.compile(loss='binary_crossentropy', optimizer=adm, metrics=['accuracy'])
model_conv.summary()
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= embedding_7 (Embedding) (None, 38, 100) 3767800 _________________________________________________________________ dropout_9 (Dropout) (None, 38, 100) 0 _________________________________________________________________ conv1d_5 (Conv1D) (None, 34, 64) 32064 _________________________________________________________________ max_pooling1d_5 (MaxPooling1 (None, 8, 64) 0 _________________________________________________________________ lstm_6 (LSTM) (None, 100) 66000 _________________________________________________________________ dense_10 (Dense) (None, 1) 101 ================================================================= Total params: 3,865,965 Trainable params: 3,865,965 Non-trainable params: 0 _________________________________________________________________
In [ ]:
model_conv.fit(padded_docs,sentiment_array, validation_split=0.2, epochs = 10)
Train on 29488 samples, validate on 7373 samples Epoch 1/10 29488/29488 [==============================] - 73s 2ms/step - loss: 0.5655 - acc: 0.6930 - val_loss: 0.4516 - val_acc: 0.7951 Epoch 2/10 29488/29488 [==============================] - 71s 2ms/step - loss: 0.4169 - acc: 0.8122 - val_loss: 0.4189 - val_acc: 0.8088 Epoch 3/10 29488/29488 [==============================] - 72s 2ms/step - loss: 0.3694 - acc: 0.8420 - val_loss: 0.4149 - val_acc: 0.8150 Epoch 4/10 29488/29488 [==============================] - 70s 2ms/step - loss: 0.3337 - acc: 0.8600 - val_loss: 0.4201 - val_acc: 0.8107 Epoch 5/10 29488/29488 [==============================] - 71s 2ms/step - loss: 0.3034 - acc: 0.8761 - val_loss: 0.4287 - val_acc: 0.8085 Epoch 6/10 29488/29488 [==============================] - 71s 2ms/step - loss: 0.2734 - acc: 0.8903 - val_loss: 0.4478 - val_acc: 0.8056 Epoch 7/10 29488/29488 [==============================] - 70s 2ms/step - loss: 0.2396 - acc: 0.9064 - val_loss: 0.4779 - val_acc: 0.8016 Epoch 8/10 29488/29488 [==============================] - 69s 2ms/step - loss: 0.2024 - acc: 0.9228 - val_loss: 0.5058 - val_acc: 0.7978 Epoch 9/10 29488/29488 [==============================] - 69s 2ms/step - loss: 0.1579 - acc: 0.9423 - val_loss: 0.5606 - val_acc: 0.7922 Epoch 10/10 29488/29488 [==============================] - 71s 2ms/step - loss: 0.1177 - acc: 0.9579 - val_loss: 0.6562 - val_acc: 0.7854
Out[ ]:
<keras.callbacks.History at 0x7fa03da84e10>
In [ ]:
**Features Passengers Concerned About **¶
In [ ]:
lower_reviews=new_airline_data.content.str.lower()
In [ ]:
features=['','security','check-in','facilities','people','passport','arrival','waiting','access']
x=np.array([1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0])
In [ ]:
new_airline_data['lower_reviews']=lower_reviews
In [ ]:
avg_pos=[]
avg_neg=[]
avg_net=[]
for feature in features:
temp=new_airline_data[new_airline_data.lower_reviews.str.contains(feature)]
avg_pos.append(len(temp[temp.sentiment==1]))
avg_net.append(len(temp[temp.sentiment==0]))
avg_neg.append(len(temp[temp.sentiment==-1]))
In [172]:
w=0.2
fig,ax=plt.subplots()
plt.bar(x-0.2,avg_pos,color='green',width=(0.2),label='avg_pos')
plt.bar(x+0.1,avg_neg,color='red',width=(0.2),label='avg_neg')
plt.bar(x+0.4,avg_net,color='black',width=0.2,label='avg_neut')
plt.xticks(rotation=45)
ax.set_xticklabels(features)
plt.legend(loc='best')
Out[172]:
<matplotlib.legend.Legend at 0x7f15fe1defd0>