Groups | Search | Server Info | Keyboard shortcuts | Login | Register [http] [https] [nntp] [nntps]


Groups > comp.lang.c > #385886

Re: "undefined behavior"?

Date 2024-06-12 18:29 -0400
Subject Re: "undefined behavior"?
Newsgroups comp.lang.c
References <666a095a$0$952$882e4bbb@reader.netnews.com> <v4d4h5$1rc9e$1@dont-email.me>
From DFS <nospam@dfs.com>
Message-ID <666a2146$0$950$882e4bbb@reader.netnews.com> (permalink)

Show all headers | View raw


On 6/12/2024 5:38 PM, David Brown wrote:
> On 12/06/2024 22:47, DFS wrote:
>> Wrote a C program to mimic the stats shown on:
>>
>> https://www.calculatorsoup.com/calculators/statistics/descriptivestatistics.php
>>
>> My code compiles and works fine - every stat matches - except for one 
>> anomaly: when using a dataset of consecutive numbers 1 to N, all 
>> values  > 40 are flagged as outliers.  Up to 40, no problem.  Random 
>> numbers dataset of any size: no problem.
>>
>> And values 41+ definitely don't meet the conditions for outliers 
>> (using the IQR * 1.5 rule).
>>
>> Very strange.
>>
>> Edit: I just noticed I didn't initialize a char:
>> before: char outliers[100];
>> after : char outliers[100] = "";
>>
>> And the problem went away.  Reset it to before and problem came back.
>>
>> Makes no sense.  What could cause the program to go FUBAR at data 
>> point 41+ only when the dataset is consecutive numbers?
>>
>> Also, why doesn't gcc just do you a solid and initialize to "" for you?
>>
> 
> It is /really/ difficult to know exactly what your problem is without 
> seeing your C code!  There may be other problems that you haven't seen yet.

The outlier section starts on line 169
=====================================================================================

//this code is hereby released to the public domain

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <time.h>

/*
  this program computes the descriptive statistics of a randomly 
generated set of N integers

  1.0 release Dec 2020
  2.0 release Jun 2024

  used the population skewness and Kurtosis formulas from:
 
https://www.calculatorsoup.com/calculators/statistics/descriptivestatistics.php
  also test the results of this code against that site

  compile: gcc -Wall prog.c -o prog -lm
  usage  : ./prog N -option (where N is 2 or higher, and option is -r or 
-c or -o)
            -r generates N random numbers
		   -c generates consecutive numbers 1 to N
		   -o generates random numbers with outliers
*/


//random ints
int randNbr(int low, int high) {
	return (low + rand() / (RAND_MAX / (high - low + 1) + 1));
}

//comparator function used with qsort
int compareint (const void * a, const void * b)
{
   if (*(int*)a > *(int*)b) return 1;
   else if (*(int*)a < *(int*)b) return -1;
   else return 0;
}


int main(int argc, char *argv[])
{
	if(argc < 3) {
		printf("Missing argument:\n");
		printf(" * enter a number greater than 2\n");
		printf(" * enter an option -r -c or -o\n");
		exit(0);
	}	
	
	
	//vars
	int i=0, lastmode=0;
	int N = atoi(argv[1]);
	int nums[N];
	
	double sumN=0.0, median=0.0, Q1=0.0, Q2=0.0, Q3=0.0, IQR=0.0;
	double stddev = 0.0, kurtosis = 0.0;
	double sqrdiffmean = 0.0, cubediffmean = 0.0, quaddiffmean = 0.0;
	double meanabsdev = 0.0, rootmeansqr = 0.0;
	char mode[100], tmp[12];
	
	//generate random dataset
	if(strcmp(argv[2],"-r") == 0) {
		srand(time(NULL));
		for(i=0;i<N;i++) { nums[i] = randNbr(1,N*3); }
		
		printf("%d Randoms:\n", N);
		printf("No commas  : ");   for(i=0;i<N;i++) { printf("%d ", nums[i]); }
		printf("\nWith commas: "); for(i=0;i<N;i++) { printf("%d,", nums[i]); }
		qsort(nums,N,sizeof(int),compareint);
		printf("\nSorted     : "); for(i=0;i<N;i++) { printf("%d ", nums[i]); }
		printf("\nSorted     : "); for(i=0;i<N;i++) { printf("%d,", nums[i]); }
	}	
	
	//generate random dataset with outliers
	if(strcmp(argv[2],"-o") == 0) {
		srand(time(NULL));
		nums[0] = 1; nums[1] = 3;
		for(i=2;i<N-2;i++) { nums[i] = randNbr(100,N*30); }	
		nums[N-2] = 1000; nums[N-1] = 2000;
		
		printf("%d Randoms with outliers:\n", N);
		printf("No commas  : ");   for(i=0;i<N;i++) { printf("%d ", nums[i]); }
		printf("\nWith commas: "); for(i=0;i<N;i++) { printf("%d,", nums[i]); }
		qsort(nums,N,sizeof(int),compareint);
		printf("\nSorted     : "); for(i=0;i<N;i++) { printf("%d ", nums[i]); }
		printf("\nSorted     : "); for(i=0;i<N;i++) { printf("%d,", nums[i]); }
	}	
	
	
	//generate consecutive numbers 1 to N
	if(strcmp(argv[2],"-c") == 0) {
		for(i=0;i<N;i++) { nums[i] = i + 1; }
		
		printf("%d Consecutive:\n", N);
		printf("No commas     : ");   for(i=0;i<N;i++) { printf("%d ", nums[i]); }
		printf("\nWith commas   : "); for(i=0;i<N;i++) { printf("%d,", nums[i]); }
	}
	
	//various
	for(i=0;i<N;i++) {sumN += nums[i];}
	double min = nums[0], max = nums[N-1];

	
	//calc descriptive stats
	double mean = sumN / (double)N;
	int ucnt = 1, umaxcnt=1;
	for(i = 0; i < N; i++)
	{
		sqrdiffmean  += pow(nums[i] - mean, 2);  // for variance and sum squares
		cubediffmean += pow(nums[i] - mean, 3);  // for skewness
		quaddiffmean += pow(nums[i] - mean, 4);  // for Kurtosis
		meanabsdev   += fabs((nums[i] - mean));  // for mean absolute deviation
		rootmeansqr  += nums[i] * nums[i];       // for root mean square
		
		//mode	
		if(ucnt == umaxcnt && lastmode != nums[i])
		{			
			sprintf(tmp,"%d ",nums[i]);	
			strcat(mode,tmp);
		}
		
		if(nums[i]-nums[i+1]!=0) {ucnt=1;} else {ucnt++;}
		
		if(ucnt>umaxcnt)
		{
			umaxcnt=ucnt;
			memset(mode, '\0', sizeof(mode));
			sprintf(tmp, "%d ", nums[i]);			
			strcat(mode, tmp);
			lastmode = nums[i];
		}
	}
	
	
	// median and quartiles
	// quartiles divide sorted dataset into four sections
	// Q1 = median of values less than Q2
	// Q2 = median of the data set
	// Q3 = median of values greater than Q2
	if(N % 2 == 0) {
		Q2 = median = (nums[(N/2)-1] + nums[N/2]) / 2.0;
		i = N/2;
		if(i % 2 == 0) {
			Q1 = (nums[(i/2)-1] + nums[i/2]) / 2.0;
			Q3 = (nums[i + ((i-1)/2)] + nums[i+(i/2)]) / 2.0;
		}	
		if(i % 2 != 0) {
			Q1 = nums[(i-1)/2];
			Q3 = nums[i + ((i-1)/2)];
		}	
	}

	if(N % 2 != 0) {
		Q2 = median = nums[(N-1)/2];
		i = (N-1)/2;
		if(i % 2 == 0) {
			Q1 = (nums[(i/2)-1] + nums[i/2]) / 2.0;
			Q3 = (nums[i + (i/2)] + nums[i + (i/2) + 1]) / 2.0;
		}
		if(i % 2 != 0) {
			Q1 = nums[(i-1)/2];
			Q3 = nums[i + ((i+1)/2)];
		}
	}
	

	
	// outliers: below Q1−1.5xIQR, or above Q3+1.5xIQR
	IQR = Q3 - Q1;
	char outliers[200]="", temp[10]="";
	if (N > 3) {
		
		//range for outliers
		double lo = Q1 - (1.5 * IQR);
		double hi = Q3 + (1.5 * IQR);
		
		//no outliers
		if ( min > lo && max < hi) {
			strcat(outliers,"none      (using IQR * 1.5 rule)");
		}

		//at least one outlier
		if ( min < lo || max > hi) {		
			for(i = 0; i < N; i++) {
				double val = (double)nums[i];
				if(val < lo || val > hi) {
					sprintf(temp,"%.0f ",val);
					temp[strlen(temp)] = '\0';
					strcat(outliers,temp);
				}
			}	
			strcat(outliers," (using IQR * 1.5 rule)");
		}	
		outliers[strlen(outliers)] = '\0';
	}
	
	
	stddev   = sqrt(sqrdiffmean/N);
	kurtosis = quaddiffmean / (N * pow(sqrt(sqrdiffmean/N),4));
	

	//output
	printf("\n--------------------------------------------------------------\n");
	printf("Minimum            = %.0f\n", min);
	printf("Maximum            = %.0f\n", max);
	printf("Range              = %.0f\n", max - min);
	printf("Size N             = %d\n"  , N);
	printf("Sum  N             = %.0f\n", sumN);
	printf("Mean μ             = %.2f\n", mean);
	printf("Median             = %.1f\n", median);
	if(umaxcnt > 1) {
	printf("Mode(s)            = %s (%d occurrences ea)\n", mode,umaxcnt);}
	if(umaxcnt < 2) {
	printf("Mode(s)            = na (no repeating values)\n");}
	printf("Std Dev  σ         = %.4f\n", stddev);
	printf("Variance σ^2       = %.4f\n", sqrdiffmean/N);
	printf("Mid Range          = %.1f\n", (max + min)/2);
	printf("Quartiles");
	if(N > 3) {printf("       Q1 = %.1f\n", Q1);}
	if(N < 4) {printf("       Q1 = na\n");}
	printf("                Q2 = %.1f      (median)\n", Q2);
	if(N > 3) {printf("                Q3 = %.1f\n", Q3);}
	if(N < 4) {printf("                Q3 = na\n");}
	printf("IQR                = %.1f      (interquartile range)\n", IQR);
	if(N > 3) {printf("Outliers           = %s\n", outliers);}
	if(N < 4) {printf("Outliers           = na\n");}
	printf("Sum Squares SS     = %.2f\n", sqrdiffmean);
	printf("MAD                = %.4f    (mean absolute deviation)\n", 
meanabsdev / N);
	printf("Root Mean Sqr      = %.4f\n", sqrt(rootmeansqr / N));
	printf("Std Error Mean     = %.4f\n", stddev / sqrt(N));
	printf("Skewness  γ1       = %.4f\n", cubediffmean / (N * 
pow(sqrt(sqrdiffmean/N),3)));
	printf("Kurtosis  β2       = %.4f\n", kurtosis);
	printf("Kurtosis Excess α4 = %.4f\n", kurtosis - 3);
	printf("CV                 = %.6f  (coefficient of variation\n", 
sqrt(sqrdiffmean/N) / mean);
	printf("RSD                = %.4f%%  (relative std deviation)\n", 100 * 
(sqrt(sqrdiffmean/N) / mean));
	printf("--------------------------------------------------------------\n");
	printf("Check results against\n");
	printf("https://www.calculatorsoup.com/calculators/statistics/descriptivestatistics.php");
	printf("\n\n");

	return(0);
}


=====================================================================================



> Non-static local variables without initialisers have "indeterminate" 
> value if there is no initialiser.  Trying to use these "indeterminate" 
> values is undefined behaviour - you have absolutely no control over what 
> might happen.  Any particular behaviour you see is done to luck from the 
> rest of the code and what happened to be in memory at the time.

In 2024 that's surprising.  I can't be the only one to forget to 
initialize a char[] variable.



> There is no automatic initialisation of non-static local variables, 
> because that would often be inefficient.  

It would've saved me half an hour of frustration.

Now I'm getting 'stack smashing detected' errors (after the program runs 
correctly) when using datasets of consecutive numbers.

hmmmm 2 issues in a row using consecutives - that's a clue!



> The best way to avoid errors 
> like yours, IMHO, is not to declare such variables until you have data 
> to put in them - thus you always have a sensible initialiser of real 
> data.  Occasionally that is not practical, but it works in most cases.

Data is definitely going in them: either the value 'none' or a list of 
the outliers and some text.



> For a data array, zero initialisation is common.  Typically you do this 
> with :
> 
>      int xs[100] = { 0 };
> 
> That puts the explicit 0 in the first element of xs, and then the rest 
> of the array is cleared with zeros.

> I recommend never using "char" as a type unless you really mean a  > character, limited to 7-bit ASCII.  So if your "outliers" array really
> is an array of such characters, "char" is fine.  If it is intended to be 
> numbers and for some reason you specifically want 8-bit values, use 
> "uint8_t" or "int8_t", and initialise with { 0 }.

I did mean characters, limited to: 0-9a-zA-Z()

I think I'm using the char variable correctly.
  sprintf(tempchar,"%d ",outlier);
  strcat(char,tempchar);


> A major lesson here is to learn how to use your tools.  C is not a 
> forgiving language.  Make use of all the help your tools can give you - 
> enable warnings here.  "gcc -Wall" enables a range of common warnings 
> with few false positives in normal well-written code, including ones 
> that check for attempts to read uninitialised data.  

I always use -Wall, and I was using it here.


"-Wextra" enables a
> slew of extra warnings.  Some of these will annoy people and trigger on 
> code they find reasonable, while most are good choices for a lot of code 
> - but personal preference varies significantly.  And remember to enable 
> optimisation, since it makes the static checking more powerful.

Just did this:
gcc -Wall -Wextra -O3 mmv2.c -o mmv2 -lm

and no warnings or errors at all.

But: it now aborts near the front when using consecutive data points 
(but not randoms).

*** buffer overflow detected ***: terminated
Aborted

I'm actually happy about that.  I should be able to find and fix it.



> If you /really/ want gcc to zero out such local data automatically, use 
> "-ftrivial-auto-var-init=zero".  But it is much better to use warnings 
> and write correct code - options like that one are an addition to 
> well-checked code for paranoid software in security-critical contexts.


Great answer!   I can always count on D Brown for excellent advice. 
Thank you.

Back to comp.lang.c | Previous | NextPrevious in thread | Next in thread | Find similar


Thread

"undefined behavior"? DFS <nospam@dfs.com> - 2024-06-12 16:47 -0400
  Re: "undefined behavior"? Barry Schwarz <schwarzb@delq.com> - 2024-06-12 14:30 -0700
    Re: "undefined behavior"? DFS <nospam@dfs.com> - 2024-06-12 17:53 -0400
      Re: "undefined behavior"? Keith Thompson <Keith.S.Thompson+u@gmail.com> - 2024-06-12 15:30 -0700
        Re: "undefined behavior"? DFS <nospam@dfs.com> - 2024-06-12 19:07 -0400
          Re: "undefined behavior"? Keith Thompson <Keith.S.Thompson+u@gmail.com> - 2024-06-12 17:33 -0700
            Re: "undefined behavior"? Malcolm McLean <malcolm.arthur.mclean@gmail.com> - 2024-06-13 05:47 +0100
              Re: "undefined behavior"? scott@slp53.sl.home (Scott Lurndal) - 2024-06-13 15:39 +0000
                Re: "undefined behavior"? Ben Bacarisse <ben@bsb.me.uk> - 2024-06-13 18:08 +0100
                Re: "undefined behavior"? bart <bc@freeuk.com> - 2024-06-13 19:01 +0100
                Re: "undefined behavior"? Malcolm McLean <malcolm.arthur.mclean@gmail.com> - 2024-06-13 19:54 +0100
                Re: "undefined behavior"? "Chris M. Thomasson" <chris.m.thomasson.1@gmail.com> - 2024-06-13 12:34 -0700
                Re: "undefined behavior"? Malcolm McLean <malcolm.arthur.mclean@gmail.com> - 2024-06-14 00:32 +0100
                Re: "undefined behavior"? Ben Bacarisse <ben@bsb.me.uk> - 2024-06-14 00:55 +0100
                Re: "undefined behavior"? Malcolm McLean <malcolm.arthur.mclean@gmail.com> - 2024-06-14 02:48 +0100
                Re: "undefined behavior"? Ben Bacarisse <ben@bsb.me.uk> - 2024-06-14 12:44 +0100
                Re: "undefined behavior"? Malcolm McLean <malcolm.arthur.mclean@gmail.com> - 2024-06-14 15:30 +0100
                Re: "undefined behavior"? Richard Harnden <richard.nospam@gmail.invalid> - 2024-06-14 16:32 +0100
                Re: "undefined behavior"? Malcolm McLean <malcolm.arthur.mclean@gmail.com> - 2024-06-14 19:06 +0100
                Re: "undefined behavior"? bart <bc@freeuk.com> - 2024-06-14 19:31 +0100
                Re: "undefined behavior"? Malcolm McLean <malcolm.arthur.mclean@gmail.com> - 2024-06-14 20:13 +0100
                Re: "undefined behavior"? Ben Bacarisse <ben@bsb.me.uk> - 2024-06-14 22:29 +0100
                Re: "undefined behavior"? Malcolm McLean <malcolm.arthur.mclean@gmail.com> - 2024-06-14 23:35 +0100
                Re: "undefined behavior"? Ben Bacarisse <ben@bsb.me.uk> - 2024-06-15 00:14 +0100
                Re: "undefined behavior"? David Brown <david.brown@hesbynett.no> - 2024-06-15 20:57 +0200
                Re: "undefined behavior"? Richard Harnden <richard.nospam@gmail.invalid> - 2024-06-15 20:27 +0100
                Re: "undefined behavior"? Ben Bacarisse <ben@bsb.me.uk> - 2024-06-15 23:13 +0100
                Re: "undefined behavior"? David Brown <david.brown@hesbynett.no> - 2024-06-16 12:53 +0200
                Re: "undefined behavior"? Malcolm McLean <malcolm.arthur.mclean@gmail.com> - 2024-06-16 14:44 +0100
                Re: "undefined behavior"? "Chris M. Thomasson" <chris.m.thomasson.1@gmail.com> - 2024-06-14 11:49 -0700
                Re: "undefined behavior"? Ben Bacarisse <ben@bsb.me.uk> - 2024-06-14 22:32 +0100
                Re: "undefined behavior"? "Chris M. Thomasson" <chris.m.thomasson.1@gmail.com> - 2024-06-15 00:56 -0700
                Re: "undefined behavior"? Keith Thompson <Keith.S.Thompson+u@gmail.com> - 2024-06-13 15:58 -0700
                Re: "undefined behavior"? bart <bc@freeuk.com> - 2024-06-14 02:18 +0100
                Re: "undefined behavior"? David Brown <david.brown@hesbynett.no> - 2024-06-14 19:08 +0200
                Re: "undefined behavior"? Keith Thompson <Keith.S.Thompson+u@gmail.com> - 2024-06-14 12:34 -0700
                Re: "undefined behavior"? David Brown <david.brown@hesbynett.no> - 2024-06-15 22:13 +0200
                Re: "undefined behavior"? Keith Thompson <Keith.S.Thompson+u@gmail.com> - 2024-06-14 13:43 -0700
              Re: "undefined behavior"? Keith Thompson <Keith.S.Thompson+u@gmail.com> - 2024-06-13 14:47 -0700
                Re: "undefined behavior"? Malcolm McLean <malcolm.arthur.mclean@gmail.com> - 2024-06-14 00:41 +0100
                Re: "undefined behavior"? Keith Thompson <Keith.S.Thompson+u@gmail.com> - 2024-06-13 17:09 -0700
  Re: "undefined behavior"? David Brown <david.brown@hesbynett.no> - 2024-06-12 23:38 +0200
    Re: "undefined behavior"? Keith Thompson <Keith.S.Thompson+u@gmail.com> - 2024-06-12 15:18 -0700
      Re: "undefined behavior"? David Brown <david.brown@hesbynett.no> - 2024-06-13 14:42 +0200
        Re: "undefined behavior"? Keith Thompson <Keith.S.Thompson+u@gmail.com> - 2024-06-13 16:39 -0700
      Re: "undefined behavior"? Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-18 17:23 -0700
        Re: "undefined behavior"? Keith Thompson <Keith.S.Thompson+u@gmail.com> - 2024-06-18 17:42 -0700
          Re: "undefined behavior"? Tim Rentsch <tr.17687@z991.linuxsc.com> - 2024-06-22 09:28 -0700
    Re: "undefined behavior"? DFS <nospam@dfs.com> - 2024-06-12 18:29 -0400
      Re: "undefined behavior"? Ike Naar <ike@sdf.org> - 2024-06-13 07:25 +0000
        Re: "undefined behavior"? DFS <nospam@dfs.com> - 2024-06-13 11:13 -0400
          Re: "undefined behavior"? scott@slp53.sl.home (Scott Lurndal) - 2024-06-13 15:40 +0000
          Re: "undefined behavior"? Lew Pitcher <lew.pitcher@digitalfreehold.ca> - 2024-06-13 15:49 +0000
            Re: "undefined behavior"? DFS <nospam@dfs.com> - 2024-06-13 13:05 -0400
      Re: "undefined behavior"? David Brown <david.brown@hesbynett.no> - 2024-06-13 15:15 +0200
        Re: "undefined behavior"? Keith Thompson <Keith.S.Thompson+u@gmail.com> - 2024-06-13 16:47 -0700
          Re: "undefined behavior"? David Brown <david.brown@hesbynett.no> - 2024-06-14 19:13 +0200
  Re: "undefined behavior"? Janis Papanagnou <janis_papanagnou+ng@hotmail.com> - 2024-06-12 23:38 +0200
    Re: "undefined behavior"? Keith Thompson <Keith.S.Thompson+u@gmail.com> - 2024-06-12 15:22 -0700
      Re: "undefined behavior"? DFS <nospam@dfs.com> - 2024-06-12 18:34 -0400
        Re: "undefined behavior"? David Brown <david.brown@hesbynett.no> - 2024-06-13 15:21 +0200
          Re: "undefined behavior"? DFS <nospam@dfs.com> - 2024-06-13 10:38 -0400
            Re: "undefined behavior"? David Brown <david.brown@hesbynett.no> - 2024-06-14 19:18 +0200
              Re: "undefined behavior"? scott@slp53.sl.home (Scott Lurndal) - 2024-06-14 17:36 +0000
                Re: "undefined behavior"? David Brown <david.brown@hesbynett.no> - 2024-06-15 22:15 +0200
              Re: "undefined behavior"? DFS <nospam@dfs.com> - 2024-06-14 19:05 -0400
                Re: "undefined behavior"? Keith Thompson <Keith.S.Thompson+u@gmail.com> - 2024-06-14 18:39 -0700
                Re: "undefined behavior"? DFS <nospam@dfs.com> - 2024-06-14 23:49 -0400
                Re: "undefined behavior"? Keith Thompson <Keith.S.Thompson+u@gmail.com> - 2024-06-14 20:56 -0700
                Re: "undefined behavior"? DFS <nospam@dfs.com> - 2024-06-15 00:45 -0400
                Re: "undefined behavior"? Janis Papanagnou <janis_papanagnou+ng@hotmail.com> - 2024-06-15 07:03 +0200
                Re: "undefined behavior"? DFS <nospam@dfs.com> - 2024-06-15 07:39 -0400
                Re: "undefined behavior"? James Kuyper <jameskuyper@alumni.caltech.edu> - 2024-06-15 01:05 -0400
                Re: "undefined behavior"? Keith Thompson <Keith.S.Thompson+u@gmail.com> - 2024-06-14 22:20 -0700
                Re: "undefined behavior"? bart <bc@freeuk.com> - 2024-06-15 09:37 +0100
                Re: "undefined behavior"? David Brown <david.brown@hesbynett.no> - 2024-06-15 22:22 +0200
      Re: "undefined behavior"? Janis Papanagnou <janis_papanagnou+ng@hotmail.com> - 2024-06-13 02:19 +0200
        Re: "undefined behavior"? David Brown <david.brown@hesbynett.no> - 2024-06-13 15:28 +0200
  Re: "undefined behavior"? Keith Thompson <Keith.S.Thompson+u@gmail.com> - 2024-06-12 14:57 -0700
  Re: "undefined behavior"? bart <bc@freeuk.com> - 2024-06-13 10:43 +0100
  Re: "undefined behavior"? Bonita Montero <Bonita.Montero@gmail.com> - 2024-06-13 11:45 +0200

csiph-web