function [theResult, duplicate_S] = average_and_remove_duplicates_aoml (output, outfile) % % Find duplicate samples in output array % % output = seven column array of... % [output_sta output_niskin output_bottle output_twiceratio output_salinity output_quality output_cast]; % % this routine looks for duplicate output_niskin for each output_sta % Averages these duplicates together provided they are less than 2*stddev % of all duplicates away from the mean duplicate difference (i.e. tries to remove outliers % before averaging). % % Output: % outfile = text file of seven columns as in output array, but with % duplicates removed. % Averaged duplicates qC flags are replaced with a "6" % % version 1 called average_and_remove_duplicates for clivar with columns of % output different % version 2 called average_and_remove_duplicates_aoml for the format as % described above. % [pathstr,this_name,ext,versn] = fileparts(outfile); duplicate_S = []; index_dups = find_repeats2 (output); if isempty(index_dups) fprintf(1,['\nNo Duplicates Found. Writing ' outfile '\n\n']) else duplicate_S = zeros (size(index_dups,1), size(index_dups,2)); duplicate_ratio = zeros (size(index_dups,1), size(index_dups,2)); for II = 1:size(index_dups,1) duplicate_S (II,:) = output(index_dups(II,:),6); duplicate_ratio (II,:) = output(index_dups(II,:),5); end % % Average duplicates and plot differences % % plot(mean(duplicate_S'),diff(duplicate_S'),'+') diff_dup_S = diff(duplicate_S'); Index_good_dups = find(abs(diff_dup_S) <= median(diff_dup_S) + 2*std(diff_dup_S)); Index_good_dups = find(abs(diff_dup_S) <= median(diff_dup_S(Index_good_dups))+ 2*std(diff_dup_S(Index_good_dups))); Index_good_dups = find(abs(diff_dup_S) <= median(diff_dup_S(Index_good_dups))+ 2*std(diff_dup_S(Index_good_dups))); figure; plot(diff_dup_S,'+') hold on; [xlimits]=get(gca,'xlim'); plot(xlimits, [median(diff_dup_S(Index_good_dups)) median(diff_dup_S(Index_good_dups))],'k--') plot(xlimits, [median(diff_dup_S(Index_good_dups))+2*std(diff_dup_S(Index_good_dups)) median(diff_dup_S(Index_good_dups))+2*std(diff_dup_S(Index_good_dups))],'r:') plot(xlimits, [median(diff_dup_S(Index_good_dups))-2*std(diff_dup_S(Index_good_dups)) median(diff_dup_S(Index_good_dups))-2*std(diff_dup_S(Index_good_dups))],'r:') [ylimits] = get(gca,'ylim'); text([xlimits(1)+0.1*diff(xlimits)], [ylimits(2)-0.1*diff(ylimits)], ['Median = ' num2str(median(diff_dup_S(Index_good_dups)),5) ' +/- ' num2str(std(diff_dup_S(Index_good_dups)),5)]) title (['Duplicate Salinity from ' upper(strrep(this_name,'_',' '))]) print -depsc2 salts_duplicates.eps % % Average good duplicates % avg_S = meanmiss(duplicate_S')'; avg_ratio = meanmiss(duplicate_ratio')'; for II = 1: length(Index_good_dups); index_dups(Index_good_dups(II),1); output (index_dups(Index_good_dups(II),1),5) = avg_ratio(Index_good_dups(II)); output (index_dups(Index_good_dups(II),1),6) = avg_S(Index_good_dups(II)); output (index_dups(Index_good_dups(II),1),7) = 6; end; % % Remove all the duplicates % output(index_dups(:,2),:) = []; end fid=fopen(outfile,'wt'); fprintf(fid,'%Station\t Cast\t Niskin\t Sample_Bottle\t 2*Cond. Ratio Corrected Salinity\n'); fprintf(fid,'%d\t%d\t%d\t%d\t%1.5f\t%2f5\t%d\n',output'); fclose(fid); theResult = output; return;