Rocksolid Light

Welcome to novaBBS (click a section below)

mail  files  register  newsreader  groups  login

Message-ID:  

The disks are getting full; purge a file today.


devel / comp.lang.c / Re: Allocate length of word vs fixed length

Re: Allocate length of word vs fixed length

<Iw5KI.24867$bR5.9748@fx44.iad>

  copy mid

https://www.novabbs.com/devel/article-flat.php?id=17616&group=comp.lang.c#17616

  copy link   Newsgroups: comp.lang.c
Path: i2pn2.org!i2pn.org!weretis.net!feeder8.news.weretis.net!news.uzoreto.com!feeder1.feed.usenet.farm!feed.usenet.farm!peer03.ams4!peer.am4.highwinds-media.com!peer03.iad!feed-me.highwinds-media.com!news.highwinds-media.com!fx44.iad.POSTED!not-for-mail
Subject: Re: Allocate length of word vs fixed length
Newsgroups: comp.lang.c
References: <U3iJI.61694$VU3.5811@fx46.iad>
<allocation-20210719204511@ram.dialup.fu-berlin.de>
<sd7ie7$3mj$1@gioia.aioe.org> <qNJJI.18502$0N5.3707@fx06.iad>
From: nos...@dfs.com (dfs)
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101
Thunderbird/78.11.0
MIME-Version: 1.0
In-Reply-To: <qNJJI.18502$0N5.3707@fx06.iad>
Content-Type: text/plain; charset=utf-8; format=flowed
Content-Language: en-US
Content-Transfer-Encoding: 7bit
Lines: 449
Message-ID: <Iw5KI.24867$bR5.9748@fx44.iad>
X-Complaints-To: abuse@blocknews.net
NNTP-Posting-Date: Thu, 22 Jul 2021 03:23:20 UTC
Organization: blocknews - www.blocknews.net
Date: Wed, 21 Jul 2021 23:23:20 -0400
X-Received-Bytes: 11163
 by: dfs - Thu, 22 Jul 2021 03:23 UTC

On 7/20/21 8:22 PM, dfs wrote:
> On 7/20/21 6:15 PM, Real Troll wrote:

>> I agree this is too simplistic but we don't know what exactly is the
>> purpose of the program.
>
> A simple linear search of the array to do word search/count/filter when
> you enter letter(s).

That's what it started out as, but HGH took over and it bloated to the
below 450 lines.

Give it a try, please, and see if you can break it. There are a few
TODOs in there, so it's not quite done.

Use whatever word list text file you want. Most of my testing was done
with words_alpha.txt from https://github.com/dwyl/english-words

----------------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <ctype.h> //used with tolower(val)
#include <sys/ioctl.h> //used with finding terminal width
#include <sys/resource.h> //used with finding terminal width
#include <unistd.h> //used with finding terminal width

#define maxlen 32

//removes trailing isspaces and quote marks
char *rtrim(char *str)
{ int len = strlen(str);
while(len>0 && (isspace(str[len-1]) || str[len-1] == '\"')) {len--;}
str[len] = '\0';
return str;
}

//timing
struct timespec start,stop;
double elapsedtime(struct timespec started)
{ const double B = 1e9;
clock_gettime(CLOCK_MONOTONIC_RAW,&stop);
return (stop.tv_sec-started.tv_sec)+
(stop.tv_nsec-started.tv_nsec)/B;
}

//width of screen - gets checked/set each time summary is run
int tcols = 50;

//set value of screen width in characters
void settcols()
{ struct winsize w = {0};
ioctl(STDOUT_FILENO, TIOCGWINSZ, &w);
tcols = w.ws_col;
}

//string compare function for qsort
int comparechar(const void *a, const void *b) {
return strcmp(*(char **) a, *(char **) b);
}

//print a separator line on screen
void printline(int linewidth, char *linechar)
{ for(int i=0;i<linewidth;i++) { printf(linechar); }
printf("\n");
}

void countdupes(char *words[],int linecnt)
{ //each time word matches prev word increase dupe cnt
int dupecnt = 0;
for(int i=0;i<linecnt-1;i++)
{
for (int j=i+1;j<linecnt;j++)
{
if (strcmp(words[j],words[i])==0)
{ dupecnt++;
//printf("%d. %s-%s (%d)\n",i, words[i],words[j], dupecnt);
}
else
{i=j-1;break;}
}
}
printf("%d dupes\n",dupecnt);
}

char *getmode(char *words[])
{ return "three words appear 4 times";
}

void printsummary(int linecnt, char *words[], int maxwlen, int
countarr[], char *infile)
{ settcols(); //set # of columns visible onscreen
printf("\n");
printline(tcols*.8,"=");
printf("Summary of %s\n",infile);
printline(tcols*.8,"=");
printf("%d words\n",linecnt);
countdupes(words,linecnt);
printf("\nFirst word is '%s'\n",words[0]);
printf("Last word is '%s'\n",words[linecnt-1]);
printf("Longest word is %d letters\n\n",maxwlen);
printf("Mean = \n");
printf("Median = %s\n",words[(int)linecnt/2]);
printf("Mode = %s\n\n",getmode(words));
//output data by rows and columns
int a=0,rows=5,cols=6;
//count words by length
int matches = 0;
int cnts[31]={0};
for(int i=1;i<=maxwlen;i++)
{
for(int j=0;j<linecnt;j++)
{
if(strlen(words[j]) == i) {matches++;}
}
//printf("%d. %d\n",i,matches);
cnts[i-1] = matches;
matches = 0;
}
printf("Word counts by length\n");
a=0;
for(int r=0;r<=rows;r++)
{
for (int c=0;c<=cols;c++)
{
if(a<maxwlen) {printf("%2d. %5d ",a+1,cnts[a]);}
a++;
}
printf("\n");
}

//count words by first letter
printf("\nWord counts by first letter\n");
a=0;
for(int r=0;r<=rows;r++)
{
for (int c=0;c<=cols;c++)
{
if(a<26) {printf("%c. %5d ",a+97,countarr[a]);}
a++;
}
printf("\n");
}

//count frequency of letters across all words
int freq[26]={0};
for(int i=0;i<linecnt;i++)
{
for(int j=0;j<strlen(words[i]);j++)
{
freq[words[i][j]-'a']++;
}
}

//copy array for use with descending frequency output
int freq2[26]={0};
memcpy(freq2,freq,sizeof(freq2));
//sort counts descending
int n=26;
for (int i = 0; i < n; ++i)
{ for (int j = i + 1; j < n; ++j)
{ if (freq[i] < freq[j])
{ a = freq[i];
freq[i] = freq[j];
freq[j] = a;
}
}
}

//output letter frequency in descending order
//TODO: if multiple letters have the same frequency the code prints
// the 1st letter over and over. Need to resolve ties and print
letters in order
printf("Descending frequency counts\n");
a=0;
for(int r=0;r<=rows;r++)
{ for (int c=0;c<=cols;c++)
{ if(a<26)
{ for(int s=0;s<sizeof(freq2);s++)
{ if(freq2[s]==freq[a])
{ {printf("%c. %6d ",s+97,freq[a]);}
break;
}
}
}
a++;
}
printf("\n");
}
printline(tcols*.8,"=");
}

int main(int argc, char *argv[])
{
//vars
char word[maxlen] = "";
char wordin[maxlen] = "";
int countarr[26]={0};
static char *offon[]= {"off","on"};
char fcmd[37];
//open file, count lines, get max word length
char filein[50];
strcpy(filein,argv[1]);
FILE *fwords = fopen(filein,"r");
int lines=0, blanks=0, wordlen=0, maxwlen=0;
while(fgets(wordin,sizeof wordin,fwords)!=NULL)
{
wordlen = strlen(rtrim(wordin));
if (wordlen>0) {
if(wordlen > maxwlen) {maxwlen = wordlen;}
lines++;
}
else
{blanks++;}
}
//printf("%d lines, including %d blanks\n",lines+blanks,blanks);

//mem
char **words = malloc(sizeof(char*) * lines);
if(words == NULL) {printf("malloc failed\n");exit(0);}


// load word list into array
rewind(fwords);
int i=0;
clock_gettime(CLOCK_MONOTONIC_RAW, &start);
while(fgets(wordin,sizeof wordin,fwords)!=NULL)
{
strcpy(word,rtrim(wordin));
if(strlen(word)>0) {
words[i] = malloc(sizeof(char*) * (strlen(word) +1));
strcpy(words[i],word);
countarr[word[0]-'a']++;
i++;
}
}
printf ("\nLoaded %d words in %.3f seconds\n\n",i,elapsedtime(start));
//close file
fclose(fwords);
//sort the array
qsort(words, lines, sizeof(char*), comparechar);

//program options:
int prt=0,lsch=0,sub=0,wsz=0,dic=0,timing=0;
char opt;
menu:
printf("\nMenu \n\n");
printf(" -l search by start of word\n");
printf(" -b search by substring\n");
printf(" -n find words of length L\n");
printf(" -s summary of word list\n");
printf(" -d definitions\n");
printf(" -p print results to screen (%s)\n",offon[prt]);
printf(" -m print this menu\n");
printf(" -t show search times (%s)\n",offon[timing]);
printf(" -x exit program\n");

//capture keyboard input
char str[32];
while (strcmp(str,"-x")!=0)
{
//startup
search:
if(lsch==0&&sub==0&&wsz==0&&dic==0)
{printf("\nEnter -option to start: ");}

if(lsch) {printf("\nEnter letters to search for: ");}
if(sub) {printf("\nEnter substring to search for: ");}
if(wsz) {printf("\nEnter size of word to search for: ");}
if(dic) {printf("\nEnter word to find definition: ");}
scanf("%s", str);
//printf("'%s'",str);

if(str[0]=='-')
{
opt = tolower(str[1]);

if(opt=='x') {exit(0);}

//search for letters at beginning
if(opt=='l')
{if(lsch==0) {lsch=1;sub=0;wsz=0;dic=0;}}

//search for substring anywhere cerin word
if(opt=='b')
{if(sub==0) {sub=1;lsch=0;wsz=0;dic=0;}}

//search for words of a size
if(opt=='n')
{
if(wsz==0) {wsz=1;lsch=0;sub=0;dic=0;}
printf("\nlook for words of size 1 to %d",maxwlen);
}

//print summary of imported words
if(opt=='s')
{
printsummary(lines, words, maxwlen, countarr, filein);
}

//use dict search
if(opt=='d')
{if(dic==0) {dic=1;lsch=0;sub=0;wsz=0;}}

//show menu
if(opt=='m') {goto menu;break;}

//print word search results to screen
if(opt=='p')
{
prt = (prt==0) ? 1 : 0;
printf("\nprint is %s",offon[prt]);
}

//show timing measures
if(opt=='t')
{
timing = (timing==0) ? 1 : 0;
printf("\ntiming is %s",offon[timing]);
}

goto search;
}

//validation
if(wsz)
{
int s = atoi(&str[0]);
if(s > maxwlen) {printf("Max = %d\n",maxwlen);goto search;}
}

//find word in dict
//TODO: handle errors
if(dic) {
memset(fcmd,'\0',sizeof(fcmd));
sprintf(fcmd,"dict %s",str);
//sprintf(fcmd,"%s","clear");
system(fcmd);
goto search;
}



if(timing) {clock_gettime(CLOCK_MONOTONIC_RAW, &start);}

//main word search routines
//TODO: maybe print results in columns based on terminal width
int matches = 0;
size_t slen = strlen(str); //length of string being sought
if(slen>maxlen-1) {
printf("Enter max of %d letters\n",maxwlen);
goto search;
}
char ss[maxlen]; //to hold first n characters of word

//match beginning letters of word
if(lsch)
{
for(i=0;i<lines;i++)
{
memcpy(ss, words[i], slen);
ss[slen] = '\0';
if(strcmp(ss,str)==0)
{
if(prt) {printf("%s ",words[i]);}
matches++;
}
}
}


//match substring anywhere in word
if(sub)
{
for(i=0;i<lines;i++)
{
strcpy(word,words[i]);
if(strstr(word,str) != NULL)
{
if(prt) {printf("%s ",words[i]);}
matches++;
}
}
}


//match size of word
if(wsz)
{
for(i=0;i<lines;i++)
{
if(strlen(words[i]) == atoi(&str[0]))
{
if(prt) {printf("%s ",words[i]);}
matches++;
}
}
}

if(timing)
{printf ("\nFound %d matches for %s in %.4f
seconds\n",matches,str,elapsedtime(start));}
else
{printf ("\nFound %d matches for %s\n",matches,str);}

} //end while

free(words);
return 0;
}
---------------------------------------------------------------------------------

SubjectRepliesAuthor
o Allocate length of word vs fixed length

By: dfs on Mon, 19 Jul 2021

24dfs
server_pubkey.txt

rocksolid light 0.9.81
clearnet tor