Commit bd6ff23d authored by Leodegario Lorenzo II's avatar Leodegario Lorenzo II
Browse files

Add get lsa function

parent 3c55d10e
from .tfidf import get_tfidf
from .lsa import get_lsa
import pandas as pd
from sklearn.decomposition import TruncatedSVD
def get_lsa(bow, random_state=1, **kwargs):
"""Return LSA and its components given bow/TF-IDF data
Parameters
----------
bow : pandas DataFrame
DataFrame containing bag of words or TF-IDF matrix
random_state : int, default=1
Random state to be used in the LSA
**kwargs : keyword arguments
Other keyword arguments to pass on TruncatedSVD
Returns
-------
df_lsa, lsa_components : pandas DataFrame
DataFrame containing LSA matrix and its components
explained_variance_ratio : numpy array
Explained variance ratio of each SV
"""
# Perform truncated SVD
svd = TruncatedSVD(bow.shape[1] - 1,
random_state=random_state,
**kwargs)
lsa = svd.fit_transform(bow)
# Set columns for results data frame
cols = [f'SV {i + 1}' for i in range(lsa.shape[1])]
# Create results data frame
df_lsa = pd.DataFrame(lsa, columns=cols, index=bow.index)
lsa_components = pd.DataFrame(svd.components_, index=cols,
columns=bow.columns)
return df_lsa, lsa_components.T, svd.explained_variance_ratio_
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment