# all_docs can be any list of strings,
# each item representing a document
a = 'The once ruddy face was puffy and pale'
b = 'The gray hair was straight and thin'
c = 'His dark brown eyes looked fixed, and he seemed \
to be daydreaming'
d = 'his figure was trim and erect'
all_docs = [a,b,c,d]
from sklearn.feature_extraction.text import TfidfVectorizer
# TfidfVectorizer is a class, so I instantiate it
# with specific pararmeters as 'vectorizer'
# I then run the object's fit_transform()
# method on my list of strings (all_docs)
# The stored variable X is output of the
# fit_transform() method
vectorizer = TfidfVectorizer(max_df=.65, min_df=1,
stop_words=None, use_idf=True, norm=None)
X = vectorizer.fit_transform(all_docs)
# The fit_transform() method converts the list of
# strings to a sparse matrix of TF-IDF values
# The toarray method converts a numpy array, which
# makes it easier to indpect every values including the zeros
myarray = X.toarray()
# prints the first row of results
print(a[0])