%% main.m % Main function of SVD2 code. % Prepares the corpus and calls SVD2. % If you use this code, please cite: % Michael Lamar*, Yariv Maron*, Mark Johnson, Elie Bienenstock. % SVD and clustering for unsupervised POS tagging. (ACL 2010) % For more information, see the above reference. % Prepare corpus diary on fprintf(sprintf('\n')) disp(['********************** START ' datestr(now) ' **********************']) fprintf(sprintf('\n')) % Load corpus [tokens, tags17, tags45, N_types, freqs] = load_wsj; % tokens - The corpus. Array of word ids. % tags17 - The tags. Reduced tag ste of Smith &Eisner % tags45 - The tags (Penn Tag set). % N_types- Numnber of different word types in the corpus. % freqs - Frequency of every word type (fraction). %% Find 17 tags % Set parameters svd_R1 = 100; % Number of singular vectors for first SVD svd_R2 = 300; % Number of singular vectors for second SVD cl_num_1 = 500; % Number of clusters after first clustering cl_num_2 = 17 ; % Number of clusters after second clustering % Tag corpus with SVD2 (takes three minutes on my desktop computer) tic labels17 = SVD2(tokens, N_types, freqs, svd_R1, cl_num_1, svd_R2, cl_num_2); toc % Evaluate induced labels against gold-standard tags [MTO_score, OTO_score, NVI_score] = eval_labels(labels17, tags17); %% Find 50 tags % Set parameters svd_R1 = 100; % Number of singular vectors for first SVD svd_R2 = 300; % Number of singular vectors for second SVD cl_num_1 = 500; % Number of clusters after first clustering cl_num_2 = 50 ; % Number of clusters after second clustering % Tag corpus with SVD2 (takes three minutes on my desktop computer) tic labels50 = SVD2(tokens, N_types, freqs, svd_R1, cl_num_1, svd_R2, cl_num_2); toc % Evaluate induced labels against gold-standard tags [MTO_score, OTO_score, NVI_score] = eval_labels(labels50, tags45); fprintf(sprintf('\n')) disp(['********************** END ' datestr(now) ' **********************']) fprintf(sprintf('\n')) diary off