Version:0.9 StartHTML:0000000105 EndHTML:0000035477 StartFragment:0000001234 EndFragment:0000035461
# maXbox Cassandra KerasClassifier August 2019, locs=114
import string
import re, os, time, sys
import numpy, pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn import metrics
from keras.utils.vis_utils import plot_model
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Embedding, LSTM, Dropout, SpatialDropout1D
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.models import load_model
#sys.path.append(os.path.dirname(__file__) + '/../Aug2019/Scripts/Modules/')
"""
from gensim.models import Word2Vec
from gensim.models import Doc2Vec
from gensim.models import KeyedVectors
"""
# get the data from https://www.kaggle.com/c/titanic/data
BASEPATH = r"C:\maxbook\maxboxpython\xampp\pitchfork-data\titanic\\"
TITANICDATA = BASEPATH+"train.csv"
os.environ["PATH"]+= os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'
# save list to file
def save_list(lines, filename):
# convert lines to a single blob of text
data = '\n'.join(lines)
# open file
file = open(filename, 'w')
file.write(data)
# close file
file.close()
# load doc into memory
def load_doc(filename):
# open file as read only
file = open(filename, 'r')
text = file.read()
file.close()
return text
#Function to create model, required for KerasClassifier
def create_model():
clf = Sequential()
clf.add(Dense(12, input_dim=6, activation='relu'))
clf.add(Dense(10, activation='relu'))
clf.add(Dense(1, activation='sigmoid'))
clf.compile(optimizer= 'adam',loss= 'mse',metrics=['accuracy'])
#model.compile(lose='mse', optimizer='rmsprop')
return clf # this model
#@main
tstart = time.time()
titanic= pandas.read_csv(TITANICDATA)
print(titanic.groupby(['Sex','Pclass']) \
['Survived'].aggregate('mean').unstack())
titanic.loc[titanic['Sex']== 'male', 'Sex']= 0
titanic.loc[titanic['Sex']== 'female', 'Sex']= 1
titanic['Age'].fillna(titanic['Age'].mean(), inplace=True)
print(titanic.info())
#predictors are the input_dim=6 in Keras Model
predictors = ['Pclass', 'Age', 'Sex', 'SibSp', 'Parch', 'Fare']
seed = 7
numpy.random.seed(seed)
batch_size = 32
#pass the keras model to classifier
model = KerasClassifier(build_fn=create_model, epochs=150,
batch_size=batch_size, verbose=1)
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
results= cross_val_score(model,titanic[predictors],titanic['Survived'],
cv=kfold)
create_model().summary()
plot_model(create_model(),
to_file=BASEPATH+'410_kerasmodel3.png',show_shapes=True)
print('clf result over cross_val mean: ',results.mean(),'\n')
# evaluate model on training dataset
# another control set to compare score and prediction
clfm = create_model()
clfm.fit(titanic[predictors], titanic['Survived'],epochs=50,
batch_size=batch_size, verbose=2)
_,acc = clfm.evaluate(titanic[predictors],
titanic['Survived'],verbose=2)
print('Train Accuracy: %.2f' % (acc*100))
# later, evaluate model on test dataset
"""
_, acc = model.evaluate(Xtest, ytest, verbose=0)
print('Test Accuracy: %.2f \n' % (acc*100))
"""
predicts= clfm.predict(titanic[predictors], batch_size=batch_size,verbose=1)
#predicts = [numpy.argmax(i) for i in predicts]
#"predicts" is a list of decimals between [0,1] with sigmoid output.
predicts= [numpy.round(i) for i in predicts]
print(metrics.confusion_matrix(titanic['Survived'], predicts,labels=[0,1]))
print(metrics.classification_report(titanic['Survived'], predicts))
print(titanic.groupby(['Sex','Pclass']) \
['Survived'].aggregate('mean').unstack())
print(titanic.groupby(['Sex', 'Pclass'])['Survived'].mean().unstack())
print('Titanic KerasClassifier time: %0.3f secs' % (time.time()-tstart))
"""
rounded = [float(numpy.round(x)) for x in predictions]
I use the pandas apply() function and a lambda function. The apply() function extrapolates a pandas series out of whatever lambda function we implement.
https://ahmedbesbes.com/sentiment-analysis-on-twitter-using-word2vec-and-keras.html
https://ermlab.com/en/blog/nlp/polish-sentiment-analysis-using-keras-and-word2vec/
"""