/*This works with 68000 (normal byte order) or 80n86 (reversed byte order) */ #define LEROOT 195 /*These are the B-tree root sectors*/ #define ELROOT 327 #define MAC #define MAXLEV 3 #include "rddict.h" #ifdef MAC #include #else #include #endif /*Basically, there are two sets of files - the index files, and the .dict files. The index files are read 512 bytes at a time, starting at a particular sector number. The root sectors, one for Loglan-English and one for English-Loglan are read in initially. They contain the words at the highest level of the B-tree. */ void main(short argc, char **argv) /* Determine what kind of files are present, and open them. Set the working root value and working file pointers. */ { short i,j,k; long pos; #ifdef IBM enhance = 1; // Enhance is for people with black and white only. The colour stuff doesn't show up well on such systems. settextmode(3); // This was a Borland call to prepare for colour enhancements (Mac uses styles instead of colour) window(1,1,79,25); // Something else used by the Borland compiler for user interface. if (enhance) { textattr(YELLOW,CYAN); clrscr(); } #endif lastpos = 0; // I believe this keeps track of where we were in the .dict file, for ease in reading next and previous puts("Interim Loglan On-Line Dictionary"); // The about... messages. puts ("Copyright 1992-3, the Loglan Institute Inc."); argc = ccommand(&argv); /* This part checks to see if all the files are available. At the time this was written hard disks were much smaller, and I allowed for only half of the dictionary to be loaded (LE or EL). This checks to see which files are present and sets flags for the program */ if (argc < 4 || argv[1][0] != '-') { /* If a file was specified, open it */ puts("Format is rddict [ -e|l|b dict flname] [index flname]"); exit(0); } linp=lind=einp=eind=0; if ((i=argv[1][1])=='b' || i=='l') { // b is for both, l for LE. In either case load LE files if (!(linp = fopen(argv[2],"r"))) { printf("\nCannot open %s",argv[2]); exit(0); } if (!(lind = fopen(argv[3],"rb"))) { printf("\nCannot open %s",argv[3]); exit(0); } root = LEROOT; //Keeps track of current root sector, either LE or EL inp = linp; //These flags inp and ind keep track of which dictionary is in use. ind = lind; if (i=='l') printf("\nLoglan-English Dictionary"); //Notify user which is in use } if (argv[1][1]=='b') { //These are DOS interface commands -b is for both dictionaries if (!(einp = fopen(argv[4],"r"))) { //Now load the E/L files printf("\nCannot open %s",argv[4]); exit(0); } if (!(eind = fopen(argv[5],"rb"))) { printf("\nCannot open %s",argv[5]); exit(0); } root = ELROOT; //Set up for EL and notify user. inp = einp; ind = eind; printf("\nEnglish-Loglan Dictionary"); } else if (argv[1][1]=='e') { //If it was e only, then load the EL files, setup and notify user. if (!(einp = fopen(argv[2],"r"))) { printf("\nCannot open %s",argv[2]); exit(0); } if (!(eind = fopen(argv[3],"rb"))) { printf("\nCannot open %s",argv[3]); exit(0); } root = ELROOT; inp = einp; ind = eind; printf("\nEnglish-Loglan Dictionary"); } else if (argv[1][1] != 'l'){ //Otherwise warn the user of an invalid parameter. printf("\nInvalid parameter"); exit(0); } cursec = root; //Set the current sector to the root of the active dictionary. /*Most of the above is interface stuff which will not be important for you, except for opening the files, verifying their availability and setting the parameters for the default file (In Mac it is L/E) */ /*Find and read in the root buffer for the active dictionary. This is done by multiplying the sector number by 512, finding the place in the file, and reading in 512 bytes starting at that address. */ faddr = (long)cursec*512; i = fseek(ind,faddr,0); i = fread(&rootbuf,512,1,ind); printf("\n"); /* Until a null line, read a line to wordbuf. Change dictionary if a flag, else look up the word . Now get a line from the user which will be the word to be looked up, or in the DOS version a flag for quitting, getting the next or previous words or changing the dictionary. These are buttons in the Mac interface, and also menu items Dictionary switch is with radio buttons*/ while (1) { puts("\n-->"); cursec = root; //Set the value to the root level = 0; // This is the level in the tree. Used to determine when leaf level reached gets(wordbuf); //Get the word or command that was entered. lcase(wordbuf); if (wordbuf[0] == '-') { //Signals a flag is coming. switch (wordbuf[1] ) { case 'e': //Switch dictionary to EL by root = cursec = ELROOT; //resetting the root and current sectors if (eind) ind = eind; //and the .ind and .dict file pointers. else { // if they were opened, of course puts("The EL dictionary files were not opened"); break; } inp = einp; puts ("English-Loglan Dictionary"); //Tell the user faddr = (long)cursec*512; //Read in the root sector for the EL dictionary i = fseek(ind,faddr,0); i = fread(&rootbuf,512,1,ind); break; case 'l': //Do the same for a shift to the LE dictionary root = cursec = LEROOT; if (lind) ind = lind; else { puts("The LE dictionary files were not opened"); break; } inp = linp; puts ("Loglan-English Dictionary"); faddr = (long)cursec*512; i = fseek(ind,faddr,0); i = fread(&rootbuf,512,1,ind); break; case 'n': //Read the next record from the .dict file. (A CR separates items) pos = ftell(inp); lastpos = pos; //Keep track of the last position. in this case before the read. donext(); //Reads the next item from the .dict file. break; case 'o': //This flag turns off the colour enhancements for b/w screens. enhance ^= 1; //This is an exclusive or of the enhance flag with 1 (a toggle) if (enhance) puts("Enhancement in effect"); else puts ("Enhancement cancelled"); break; case 'p': // Previous code goes here. doback(); //Read the previous record. break; case 'q': //q is normal for Mac quit, x for DOS quit. Both accepted. case 'x': fclose(inp); //In commenting this, I wonder if I left two files open fclose(ind); exit(0); default: puts ("Invalid parameter"); break; } continue; } /* Search for the word. Return a sector number if the word is not in the sector. Return -1 if further search in this sector is needed. If the next sector is the same as this sector (All leaf sectors point to 0) the word is not present. */ if (root==LEROOT) { // If we are in LE, and the user enters multiple words /* e.g. bakso madzo, the program replies with baormao, seeks, and gives the definition if present */ if (strchr(wordbuf,' ')) canWord(wordbuf); //The word-making routine puts(wordbuf); //Write the constructed word. } /* The search routine compares the word with the root-sector words. If the search word finds the next word is alphabetically higher than the test word, it takes the preceding sector number from the .ind file . If it reaches the end and is still larger, it takes the following sector number for the next test. It then loads that sector and repeats the search with newsearch, until the leaf level is reached, and no further sectors remain, or the word is found */ nextsec = search(wordbuf,&rootbuf); if (nextsec == -1) continue; else newsearch(nextsec); } } short newsearch (short nextsec) /* Given a sector newsearch searches that sector. If it is not found we continue down the tree, until no more levels are found. Each time a new sector to search is identified, it can call itself to search the new sector.*/ { short i,j,k; cursec = nextsec; // Next sector becomes the current sector. ++level; //and we go to a lower level in the B-tree. /* I am using a maximum level rather than a zero sector number to determine when we are finished, to avoid a loop as there currently is a 0 sector number containing the lowest members of the dictionary */ if (level==MAXLEV) { //We got to the bottom without finding the word. Notify the user. puts ("Word not found. Next is:"); fseek(inp,naddr,0); //Now you see why we determined naddr when we failed a match. lastpos = naddr; // Set the last position in the dictionary, and display record at naddr. donext(); return -1; } faddr = (long)cursec*512; //faddr is the address in the index, calculated as for LEROOT fseek(ind,faddr,0); fread(&nodebuf,512,1,ind); //read it into nodebuf, and use search to get the nextsector nextsec = search(wordbuf,&nodebuf); if (nextsec == -1) return -1; else newsearch(nextsec); // Recurse with the new nextsec. return -1; } short search(char *word, Bsect *node) /*Read the sector NODE. Set prev to next lower sector. Look for the word. Return from look with -1 is prev is to be searched, else an address in the dict. If the latter,read and print it This is perhaps a bit tricky to understand because of the bit manipulation, and the need to deal with either big endian or little endian storage*/ { char *ptr; short i,j,k; ptr = word; /* Firstly we look up the pointer to the sector to be returned if the word is smaller than the test word. Recall that sector numbers are stored between the words in a B-tree. The smallest sector referred to in this level is at the beginning of the sector. 10 bits are alloted for the sector numbers, and the remaining 22 bits (in a structure of two 16-bit words) are a byte address in the .dict file */ prev = node->ltr[0].pair>>6; //Shift right 6 bits to isolate the previous sector # faddr = look(ptr,node,1); //This actually does the word comparison if (faddr != -1) { // If an address was found, it returns it, else returns -1 fseek(inp,faddr,0); //Find the place on the file where faddr is (bytes from the beginning) lastpos = faddr; //Keep track of this for previous button donext(); //Get the record from the file and display it. return -1; // -1 is returned because no more seeking needs to be done. We found it. } else return prev; //prev is the next sector to be searched, as indicated above. } unsigned long look(char *ptr, Bsect *node, short no) /* Looks in an index node for the word. (Structure of index on request). Returns the dictionary address of the word if present, else -1, whereupon the next sector in the tree (prev) is searched ptr is the pointer to the word to be searched, node is the pointer to the node(sector) being searched, no is the word number in the sector to begin the search. */ { short i,nxt; if ((i=(*ptr-node->ltr[no].each[0])) < 0) { /*Here we are comparing the first letter of the word (*ptr) with the first letter in the sector. Note that a sector is divided into 256 16-bit words, which are in a record, to use Pascal terminology, that can be addressed either as word (16-bit) , or each (a two byte array). Such a record is called a 'union' in C. node->ltr[no] addresses the corresponding 16-bit word, .each tells it to use the character addressing, and [0] is the first character in the word. If the letter is smaller than the first letter in the sector*/ while (nxt=node->ltr[no].each[0]) ++no; //Scan down letters looking for a zero //then get the following word, isolate lower 6 bits, and shift left 16 bits to put it at the top of a long word. naddr = (long)(node->ltr[no+1].pair&0x3f)<<16; //Then or this word with the next word after and store both in naddr. // Note op= (as in |=) means do op on right & left hand sides and put the result in rt. x+=1 is //x=x+1 naddr |= (unsigned short)node->ltr[no+2].pair; return -1; //Word was not found, so return -1 /* You may wonder why we did the search for naddr, which is the dictionary address of the first word in this sector, as the word we are seeking to match is not in this sector. I hope it will become clear later */ } else if (i) { //i is the result of the comparison above. In this case the word is farther along if (!(nxt=node->ltr[no].each[1])) { /* You may recall the second byte in this two-byte array is a pointer to later in the sector. We are checking if such a pointer exists. If it does not exist, we start scanning down the sector looking for such a node. If one is found, its pointer (which is a word number in this sector) is put into 'no' for a recursive search. The object of this exercise is to find the next higher sector number to search, so we follow the chain until no larger words are found in this sector. Eventually we come to the end of a word (a zero byte) and excape the loop*/ while (node->ltr[no].each[0]) { if (!node->ltr[no].each[1]) ++no; else no = node->ltr[no].each[1]; } /*Now we isolate the sector number we have found. It is in the top 10 bits of the first word following the zero. Note that In this case, "prev" is actually the "following" i.e. The sector next higher in the alphabet. We also determine the address of the highest word in naddr as before*/ prev = node->ltr[no+1].pair>>6; naddr = (long)(node->ltr[no+1].pair&0x3f)<<16; naddr |= (unsigned short)node->ltr[no+2].pair; return -1; //Because the word has not yet been found. } else { //The character that was smaller had a pointer to a higher character in nxt. /* Set the next lower sector in the alphabet and look some more. This is in case the word ends up to be smaller than the word we are currently matching with. The next lower sector is in the top 10 bis of the word two before this one */ prev = node->ltr[nxt-2].pair>>6; /*We can now do a recursive search starting at the same character of the word (ptr) at position nxt in node */ return look(ptr,node,nxt); } } else { //The current character matched. If it is 0, the word has been completely matched. if (!*ptr) { /*We found it!. Get the dictionary address as we did before*/ faddr = (long)(node->ltr[no+1].pair&0x3f)<<16; faddr |= (unsigned short)node->ltr[no+2].pair; return faddr; //and return it to be looked up in the dictionary file. } /* Matching is still in progress. If we have not reached the end of the word, advance one letter in the word, and one letter in the node, and repeat the process */ else { /*Move on a letter and look again*/ ++no; ++ptr; return look(ptr,node,no); } } } void doprint(unsigned char *tptr, short i) /*To ensure the printout doesn't break words in a line. To enhance certain words as required, and fix special chars*/ { short j,k; unsigned char *ptr; ptr = tptr; k = 0; while (*ptr != '\n'&& *ptr != '\r' && k= MAXRECORD); if (*ptr=='\t') { if (i<5) while (i<5) {putchar(' '); ++i;} else if (i<10) while (i<10) {putchar(' '); ++i;} else if (i<15) while (i<15) {putchar(' '); ++i;} else if (i<20) while (i<20) {putchar(' '); ++i;} else if (i<25) while (i<25) {putchar(' '); ++i;} else {putchar(' '); ++i;} ++ptr; ++k; } else if (*ptr==0xc9) { /* Š */ for (j=0; j<3; ++j) putchar('.'); i += 3; ++ptr; ++k; } else if (*ptr==209||*ptr==208) { putchar('-'); i +=1; ++ptr; ++k; } else if (*ptr==124) { *ptr = '\t'; } else if (*ptr=='&' && *(ptr+1)==',') { ++ptr; ++ptr; k +=2; } else if (*ptr==0xB2) { /* ¾ */ putchar('\n'); i = 0; while (i<5) {putchar(' '); ++i;} ++ptr; ++k; } else if (*ptr==0xB3) { /* „ */ *ptr = '\t'; } else if (*ptr==0xD6) { /* ÷ */ #ifdef IBM if (enhance) setatr(BLACK,RED,HIGH,NO_BLINK ); eos(); #endif ++j; ++ptr; } else if (*ptr==0xC0) { /* ¿ */ #ifdef IBM if (enhance) setatr(BLACK,WHITE,LOW,NO_BLINK); eos(); #endif ++j; ++ptr; } else if (*ptr==0xC7) { /* « */ i+=2; #ifdef IBM if (enhance) setatr(BLACK,GREEN,HIGH,NO_BLINK); eos(); #endif putchar(' '); ++ptr; ++k; } else if (*ptr==0xC8) { /* » */ i += 2; #ifdef IBM if (enhance) setatr(BLACK,WHITE,LOW,NO_BLINK); eos(); #endif putchar(' '); ++ptr; ++k; } else if (*ptr==',') { ++i; #ifdef IBM setatr(BLACK,WHITE,LOW,NO_BLINK); eos(); #endif putchar(','); ++ptr; ++k; } else { putchar(*ptr++); ++k;} ++i; if (i>70&&(*ptr=='-'||*ptr==' '||*ptr=='/')) { putchar('\n'); i=0; } } } void lcase(char *c) /*Convert a string in upper or mixed case to all lower case*/ { while (*c != '\0') { *c=tolower((int)*c); ++c; } return; } void donext(void ) { short i,j,k; long pos; i = 0; #ifdef IBM if (enhance) setatr(BLACK,GREEN,HIGH,NO_BLINK); eos(); #endif repet:k = 0; while((outbuf[k++]=j=getc(inp))!='\n'&&j!='\r'&&k