This is asciioutput.c in view mode; [Download] [Up]
/* -*- c -*- */ /* Webster Access, a program to use NeXT online Webster dictionary. Copyright (C) 1994 Benoit Grange, ben@fizz.fdn.org This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #import <libc.h> #import <streams/streams.h> /* Sections are the first byte in the decoded text buffer */ typedef struct { int tag; char *name; } tSection; tSection dicoSections[] = { { 0x00, "" }, { 0x01, "* Abbreviations *\n" }, { 0x03, "* Bibiographical names *\n" }, { 0x04, "* Geographical names *\n" }, { 0x05, "* Colleges and Universities *\n" }, { 0x06, "* Abbreviations used in the dictionary *\n" }, { 0x00, NULL } }; /* Modes are found in the buffer */ typedef struct { int tag; char *name; char *before; int bFont; char *after; int aFont; int bFont2; int aFont2; } tMode; tMode dicoModes[] = { { 0xFF, "Initial", "", 0, "", 0 }, { 0x02, "Pronouciation", " \\", 0, "\\", 0}, { 0x03, "Ignore-03", "\n", 0, 0, 0 }, { 0x04, "Genre", " ", 0x0F, "", 0x0E }, { 0x05, "Plural", " " }, { 0x06, "Picture expected" }, { 0x07, "Date", "\n(", 0, ")" }, { 0x08, "Domain", "\n<domain-tags ", 0, ">", 0 }, { 0x09, "Ethymology", "\n[", 0, "]" }, { 0x0B, "Definition", "\n", 0x10, "", 0 }, { 0x0D, "Usage" }, { 0x14, "Text", ":", 0x0E }, { 0x00, NULL } }; tMode thesModes[] = { { 0xFF, "Initial", "", 0, "", 0 }, { 0x03, "Genre", " ", 0x0D, "\n", 0x0F}, { 0x04, "Def", "", 0, " ", 0}, { 0x05, "Syn", "syn ", 0, "\n", 0, 0x10, 0x0F}, { 0x06, "Ant", "ant ", 0, "\n", 0, 0x10, 0x0F}, { 0x07, "Con", "con ", 0, "\n", 0, 0x10, 0x0F}, { 0x08, "Rel", "rel ", 0, "\n", 0, 0x10, 0x0F}, { 0x09, "Idiom", "idiom ", 0, "\n", 0, 0x10, 0x0F}, { 0x0A, "?", " (", 0, ")\n", 0, 0, 0}, { 0x0B, "Compare", " compare ", 0, "", 0, 0x0D, 0x0F}, }; typedef struct { int tag; char *name; } tFont; tFont dicoFonts[] = { { 0x0E, "Normal" }, { 0x0F, "Italics" }, { 0x10, "Bold", }, { 0x17, "Superscript" }, { 0x18, "Subscript" }, { 0x1C, "Sanssherif" }, { 0x1E, "Smallcaps" }, { 0x1F, "BoldItalic" }, { 0x0, NULL }, }; tFont thesFonts[] = { { 0x0D, "Italics" }, { 0x0F, "Normal" }, { 0x10, "Bold", }, { 0x11, "Caps", }, }; void outputFont(int tag, NXStream* stream, int modeSelect) { if (modeSelect & 2) { tFont *font = (modeSelect & 0x4)?thesFonts: dicoFonts; while (font->tag) { if (font->tag == tag) { NXPrintf(stream, "{%s}", font->name); return; } font++; } } } typedef struct { unsigned char c; char* output1; char* output2; } tChars; // Normal Chars tChars dicoSingleChars[] = { {0x0c, "||", "||"}, /* double bar */ {0x11, "<e>", "<e>"}, /* inverted "e" */ {0x12, "'", "'"}, /* high stress mark */ {0x13, ",", ","}, /* low stress mark */ {0x15, "<e^->", "<e^->"}, /* long e */ {0x16, "<a:>", "<a:>"}, /* a umlaut */ {0x19, "<o^->", "<o^->"}, /* long o */ {0x1a, "<a^->", "<a^->"}, /* long a */ {0x1b, "<i^->", "<i^->"}, /* long i */ {0x1d, "<o.>", "<o.>"}, /* o with one . */ {0, NULL } }; // Chars prefixed by 0x0A tChars dicoAChars[] = { 0x05, "=/=", "=/=", /* not equal to, see "inequality", NeXT wrong*/ 0x0a, " ", " ", /* ? spaces? see abernathy */ 0x0c, "", "", /* ??? Seems to mean nothing, see "embrace" */ 0x10, "$", "$", /* dollar sign, see "run" */ 0x11, "`", "`", /* open quote of some sort, see "Amos" */ 0x17, "|", "\263", /* dagger, "bowdlerize" */ 0x19, "<==>", "<==>", /* triple bond, acetylene */ 0x1a, "-", "-", /* superscript "-" for exponents, see electronvolt */ 0x1c, "-", "-", /* minus sign, see absolute value */ 0x21, "X", "\236", /* multiplication, see avogadronumber */ 0x23, "+/-", "\321", /* plus or minus, see primenumber */ 0x22, "-:-", "\237", /* "divided by" sign, * indicates frowned-on pronunciation */ 0x27, "<root>", "<root>", /* square root sign, see "complexnumber" */ 0x28, "'", "'", /* ? Some sort of tick, see adenosine35monophosphate*/ 0x29, "\"", "\272", /* inches, see "sort" */ 0x2c, "<doubledagger>", "\264", /* doubledagger */ 0x34, "'", "\302", /* accent, see "accent" */ 0x35, "`", "\301", /* accent, see "accent" */ 0x36, "<breve>", "\306", /* breve accent */ 0x37, "~", "\303", /* circomflex, see circomflex */ 0x38, ".", "\267", /* bullet, see adriamycin */ /* * greek letters - I can't figure out some of the inconsistencies */ 0x3c, "<alpha>", "<alpha>", /* alpha, see "alpha" */ 0x3e, "<beta>", "\373", /* capital Beta, see "beta" */ 0x41, "<gamma>", "<gamma>", 0x42, "<Delta>", "<Delta>", 0x43, "<delta>", "<delta>", 0x44, "<kappa>", "<kappa>", 0x48, "<eta>", "<eta>", 0x49, "<phi>", "<phi>", /* greek phi, see "arccosecant" */ 0x4a, "<Eta>", "<Eta>", 0x4c, "<Iota>", "<Iota>", 0x4e, "<epsilon>", "<epsilon>", /* ? */ 0x52, "<mu>", "<mu>", 0x59, "<Omega>", "<Omega>", 0x5a, "<pi>", "<pi>", /* pi, see "number" */ 0x5c, "<rho>", "<rho>", /* rho, see cardioid */ 0x5d, "<Sigma>", "<Sigma>", 0x5e, "<sigma>", "<sigma>", 0x6b, "<omega>", "<omega>", 0x61, "<Upsilon>", "<Upsilon>", 0x62, "<upsilon>", "<upsilon>", 0x63, "<tau>", "<tau>", 0x66, "<x shaped fricative>", "<x shaped fricative>", /* see vernerslaw */ 0x6c, "<a'>", "\326", /* a with a ', see abaca */ 0x6d, "<a`>", "\325", /* a accent grave, see "hoopla" */ 0x6e, "<a^>", "\327", /* a with a ^, see abusimbel */ 0x70, "<A'>", "\202", /* A with a ', see altoparan */ 0x71, "<A`>", "\201", /* A with a ` see "akempis" */ 0x72, "<A^>", "\203", /* A ^, see nasserlake */ 0x73, "<A:>", "<A:>", /* A super .., see fetch */ 0x74, "<n~>", "\347", /* n tilde, see angeleno */ 0x75, "<c,>", "\333", /* c cedille, see acores */ 0x76, "<N~>", "<N~>", /* N cedille, see corunna */ 0x77, "<C,>", "\207", /* C cedille, see Catalca */ 0x78, "<e'>", "\335", /* e accent aigu, see "authority" */ 0x79, "<e`>", "\334", /* e accent grave, see aerometer */ 0x7a, "<o^>", "\356", /* o circonflex, see berneralpen */ 0x7b, "<u'>", "\363", /* u with a ', see aainel */ 0x7c, "<U'>", "\230", /* U with a ', see aiun */ 0x7e, "<U^>", "\231", /* U with a ^ see elmansra */ 0x7f, "<U:>", "<U:>", /* U with two dots, see baden */ 0x80, "<u`>", "\362", /* u with a see mohawk */ 0x81, "<e^>", "\336", /* e circonflex, see amnemachin */ 0x82, "<e:>", "\337", /* e two dots above, see aedes */ 0x83, "<E'>", "\211", /* E aigu, "blue" */ 0x84, "<E`>", "\210", /* E grave, see aiguillette */ 0x85, "<E^>", "\212", /* E circonflex, see arris */ 0x86, "<E:>", "\213", /* E two dots, avlona */ 0x87, "<u^>", "\364", /* u circonflex, acoupsr */ 0x88, "<u:>", "<u:>", /* u with two dots */ 0x89, "<o'>", "\355", /* o with a ', abalone */ 0x8a, "<o`>", "\354", /* o grave, see amati */ 0x8b, "<o^>", "\356", /* o circonflex, see abientot */ 0x8c, "<o:>", "\360", /* o two dots, see ale */ 0x8d, "<O'>", "\223", /* O', see chatham */ 0x8f, "<O^>", "\224", /* O^, see dinner */ 0x90, "<O:>", "\226", /* O two dots, see austria */ 0x91, "<i'>", "<i'>", /* i ', see acarnania */ 0x92, "<i`>", "<i`>", /* i grave, see bokchoy */ 0x93, "<i^>", "\344", /* i ^, see aine */ 0x94, "<i->", "<i->", /* long i, see abbey */ 0x95, "<I'>", "<I'>", /* I ', see cerigo */ 0x97, "<I^>", "\216", /* I ^, see devilsisland */ 0x98, "<I:>", "\217", /* I with two dots, see Bougie */ 0x9a, "<o,>", "<o,>", /* o cedille see hog */ 0xa1, "<u^->", "<u^->", /* long u "house" */ 0xa2, "<a.>", "<a.>", /* a with one . */ 0xa4, "<u.>", "<u.>", /* u with one . */ 0xa5, "<nj>", "<nj>", /* nj dipthong */ 0xa7, "<oe>", "<oe>", /* "oe" ligature */ 0xa8, "<ue>", "<ue>", /* "ue" ligature, see badenwurttemberg */ 0xa9, "<ue->", "<ue->", /* long ue , see acoupsr */ 0xaa, "<^y>", "<^y>", /* super y, see actegratuit */ 0xab, "<\\e>", "<\\e>", /* small inverted e */ 0xad, "<a^u>", "<a^u>", /* a super 'u', see aaron */ 0xad, "<a^u>", "<a^u>", /* a super hook, see "aceldama" */ 0xaf, "<a^o>", "<a^o>", /* a superscript open circle, see "bokmal" */ 0xb1, "<A^o>", "<A^o>", /* A super o, see ahvenanmaa */ 0xb2, "<cv>", "<cv>", /* c super 'v', see comanche */ 0xb3, "<e^v>", "<e^v>", /* e with a "v" accent, see "favor" */ 0xb4, "<n^v>", "<n^v>", /* n with a 'v', see mrianskelazne */ 0xb5, "<r^v>", "<r^v>", /* r with a v see cricetid */ 0xb6, "<s^v>", "<s^v>", /* s with a "v" accent, see "help" */ 0xb7, "<z^v>", "<z^v>", /* z super 'v', see chew */ 0xb8, "<C^v>", "<C^v>", /* C super v, see apek */ 0xb9, "<S^v>", "<S^v>", /* S super v , see koda */ 0xba, "<Z^v>", "<Z^v>", /* Z super v, see ika */ 0xbb, "<s,>", "<s,>", /* s sub , see alipaa */ 0xbc, "<S,>", "<S,>", /* S sub , see tami */ 0xbe, "<e,>", "<e,>", /* e sub backwards comma.. owicim */ 0xc0, "<l/>", "<l/>", /* ? l with a slash? see aleurone */ 0xc1, "<i>", "<i>", /* dotless i? aydn */ 0xc2, "<^o>", "\353", /* Degrees symbol, see "fall" */ 0xc5, "<o/>", "<o/>", /* o with a slash, see bilberry */ 0xc6, "<A^->", "<A^->", /* long A */ 0xc7, "<E^->", "<E^->", /* long E, "echo" */ 0xc8, "<I->", "<I->", /* long I, see aurangzeb */ 0xc9, "<O->", "<O->", /* Long O, see obadiah */ 0xca, "<U->", "<U->", /* long U see albucasis */ 0xcb, "<^n>", "<^n>", /* small superscript n, see "Bechar" */ 0xcc, "<O/>", "<O/>", /* O with a slash, see resund */ 0xcd, "<AE>", "<AE>", /* ae ligature, see "ae" */ 0xce, "<a~>", "<a~>", /* a ~ see aripuan */ 0xcf, "<ae>", "<ae>", /* "ae" ligature */ 0xd0, "<y'>", "<y'>", /* y' see comenius */ 0xd1, "<o^u>", "<o^u>", /* o with a u, see holyofholies */ 0xd2, "<o~>", "\225", /* o~, see cames */ 0xd3, "<I'>", "<I'>", /* I' see nonu */ 0xd4, "<S'>", "<S'>", /* S ' see auschwitz */ 0xd5, "<E,>", "<E,>", /* E sub backwards-comma, see auschwitz */ 0xd7, "<A~>", "<A~>", /* A~ see prncipe */ 0xd9, "<oe->", "<oe->", /* long oe ligature, austria */ 0xde, "<_k_>", "<_k_>", /* k underscore */ 0xdf, "<_th_>", "<_th_>", /* underlined "th", "father" */ 0xe1, "<y:>", "\375", /* y with two dots, see lous */ 0xe2, "<s'>", "<s'>", /* s ', see owicim */ 0xe3, "<x'>", "<x'>", /* x ', see alexander */ 0xe5, "<L/>", "<L/>", /* L with a / through it, see migyrydz */ 0xe6, "<y^->", "<y^->", /* long y "foist" */ 0xe8, "<u^v>", "<u^v>", /* u with a v, see roof */ 0xea, "<n'>", "<n'>", /* n ', see stanisawi */ 0xeb, "<z'>", "<z'>", /* z ', see nebbish */ 0xed, "<A^u>", "<A^u>", /* A super u, see adar */ 0xf9, "<e.>", "<e.>", /* e super ., see "bark" */ 0xfb, "<g^v>", "<g^v>", /* g super v, see shagreen */ 0xfd, "<u^u>", "<u^u>", /* u super u, see many */ 0, NULL }; // Chars prefixed by 0x7F tChars dico7Chars[] = { 0x00, "<O^~>", "<O^~>", /* O superscript ~, see "amazon" */ 0x01, "<o->", "<o->", /* long o, see "abdomen" */ 0x06, "<i^u>", "<i^u>", /* i with a hook, see "beluga" */ 0x07, "<t.>", "<t.>", /* t sub ., see "adobe" */ 0x08, "<T.>", "<T.>", /* T sub ., see "Al" */ 0x0a, "<h.>", "<h.>", /* H sub ., see "aceldama" */ 0x0b, "<H,>", "<H,>", /* H sub hook, see alijz */ 0x13, "<n.>", "<n.>", /* n super ".", see ahriman */ 0x15, "<sv.>", "<sv.>", /* s sub ., see "alcazar" in printed dict! */ 0x16, "<m.>", "<m.>", /* m super ., see "ahimsa" */ 0x17, "<u->", "<u->", /* long u? see dungaree */ 0x18, "<ae^->", "<ae^->", /* long ae, see "blue" */ 0x19, "<e~>", "<e~>", /* e with a curly accent, see "schwa" */ 0x1a, "<a-~>", "<a-~>", /* a with a ~ and a -, see "bandana" */ 0x1b, "<_r_>", "<_r_>", /* underlined r, see betel */ 0x1d, "<h(u)>", "<h(u)>", /* h with a "u" under it, "basalt", NeXT wrong*/ 0x20, "<E^u>", "<E^u>", /* E with a thing on it. see "amorite" */ 0x21, "<dv.>", "<dv.>", /* d sub ".", see "alcalde" (in printed) */ 0x22, "<rv.>", "<rv.>", /* r sub ".", see "andr" (printed) */ 0x25, "<ue->", "<ue->", /* long ue, see cuvier */ 0x29, "<Hv.>", "<Hv.>", /* H sub ".", "aggeus" */ 0x2a, "<cents>", "<cents>", /* cents sign, see hoof */ 0x2b, "<o\">", "<o\">", /* o with two '', see "pengo" */ 0x2c, "<j^v>", "<j^v>", /* j with a v, see file */ 0x2d, "<n\\.>", "<n\\.>", /* n sub ., see "banyan" */ 0x31, "<z.>", "<z.>", /* z sub one ., see nizam */ 0x33, "<z..>", "<z..>", /* z sub .., see nizam */ 0x34, "<s^v>", "<s^v>", /* see hyksos */ 0x35, "<o)>", "<o)>", /* german something, see "dobereiner" */ 0x37, "<o,>", "<o,>", /* o sub (hook shaped thing), see "law" */ 0x38, "<e-~>", "<e-~>", /* e with a - and a ~, see oneida */ 0x3c, "<t..>", "<t..>", /* t sub .., see tabla */ 0x5f, "<v>", "<v>", /* v form of circumflex, see circumflex */ 0x60, "<i-~>", "<i-~>", /* long i tilde , see chintz */ 0x61, "<edh>", "<edh>", /* old english letter, see edh, NeXT wrong */ 0x62, "<?>", "<?>", /* Spanish inverted ?, see cooncan */ 0x63, "<r..>", "<r..>", /* r sub two dots, see coir */ 0x64, "<v>", "<v>", /* v accent, see hacek */ 0x65, "<->", "<->", /* macron mark, see macron */ 0x66, "<l.>", "<l.>", /* l sub dot, see milo */ 0x67, "<|b>", "<|b>", /* strange runic character see futhark */ 0x68, "?!", "?!", /* interrobang, see interrobang */ 0x69, "<;>", "<;>", /* Messy Egyptian char, see "hyksos" */ 0x6a, "==>", "==>", /* "index" character (right pointing arrow), see index */ 0x6b, "<u-~>", "<u-~>", /* u with a - and a ~, see mungbean */ 0x6c, "<=", "<=", /* less than or equal, see harmonic series */ 0x6d, "<i'>", "<i'>", /* i with a something on it, see ephah */ 0x6e, "<Rx>", "<Rx>", /* Rx symbol, see Rx */ 0x6f, "<paragraph-mark>", "<paragraph-mark>", /* see "paragraph" */ 0x70, "<_t_>", "<_t_>", /* underscore t, see "niter" */ 0x71, "<epsilon>'", "<epsilon>'", /* see smooth breathing */ 0x72, "<iota^>", "<iota^>", /* see smooth breathing */ 0x73, "<S.>", "<S.>", /* some Hebrew S sub ., see zion */ 0x74, "<b shaped fricative>", "<b shaped fricative>", /* I give up, see "vernerslaw" */ 0x75, "<\">", "<\">", /* diaeresis, see diaeresis */ 0x76, "<x shaped fricative>", "<x shaped fricative>", /* see vernerslaw */ 0x77, "<yogh>", "<yogh>", /* it's an old enlih letter, see "yogh" */ 0x78, "||", "||", /* parallel, see parallel */ 0x79, "<\\/>", "<\\/>", /* checkmark (see check) - look at what NeXT * uses , it's not even this good */ 0x7a, "<e`>", "<e`>", /* e grave, see gabs */ 0x7b, "<,>", "<,>", /* cedilla accent, see cedilla */ 0x7d, "<intersection>", "<intersection>", /* inverted U, see "cap", NeXT wrong */ 0x7f, "<paragraph>", "<paragraph>", /* see "dingbat" - why are there 2 of these? */ 0x7e, "<iron-cross>", "<iron-cross>", /* well that's what it looks like, see dingbat*/ 0x80, "<rho `>", "<rho `>", /* see roughbreathing */ 0x81, "<omega `>", "<omega `>", /* see roughbreathing */ 0x82, "#", "#", /* spacemark, see "spacemark", NeXT wrong */ 0x84, "<zeta>", "<zeta>", /* see roughbreathing */ 0, NULL }; void convertToAscii(const void* data, int length, const char* word, NXStream** outstream, int modeSelect) { register const unsigned char* text = data; tMode *currentMode = (modeSelect & 0x4)? thesModes : dicoModes; tSection *sect = dicoSections; int nbZap = 0; NXStream* stream = NXOpenMemory(NULL, 0, NX_WRITEONLY); *outstream = stream; while (sect->name) { if (sect->tag == *text) { NXPrintf(stream, "%s", sect->name); break; } sect++; } if ((modeSelect & 2) && !sect->name) NXPrintf(stream, "*** Unknown Section ***\n"); text++; length--; if ((modeSelect & 0x04) == 0) { while (length && (*text != ' ')) text++, length--; if (length) text++, length--; } while (length) { tMode *mode = (modeSelect & 0x4)?thesModes:dicoModes; tFont *font = (modeSelect & 0x4)?thesFonts:dicoFonts; tChars *tc = dicoSingleChars; // Skip zeroes // if (*text == 0) goto next; // The ~ replaces the defined word if (*text == '~') { NXPrintf(stream, "%s", word); goto next; } // Print normal chars if ((*text>31) && (*text<127)) { NXPutc(stream, *text); goto next; } if ((*text == 0x0A) && ((modeSelect & 0x04) == 0)) { tChars *tcA = dicoAChars; text++; length--; if (!length) break; while (tcA->output1) { if (tcA->c == *text) { NXPrintf(stream, "%s", (modeSelect & 0x1)?tcA->output2:tcA->output1); goto next; } tcA++; } nbZap++; if (modeSelect & 2) NXPrintf(stream, "$0A$%02X", (unsigned)*text); goto next; } if (*text == 0x7F) { tChars *tc7 = dico7Chars; text++; length--; if (!length) break; while (tc7->output1) { if (tc7->c == *text) { NXPrintf(stream, "%s", (modeSelect & 0x1)?tc7->output2: tc7->output1); goto next; } tc7++; } nbZap++; if (modeSelect & 2) NXPrintf(stream, "$7F$%02X", (unsigned)*text); goto next; } while (mode->name) { if (mode->tag == *text) { if (currentMode != mode) { if (currentMode->aFont) outputFont(currentMode->aFont, stream, modeSelect); if (currentMode->after) NXPrintf(stream, "%s", currentMode->after); if (currentMode->aFont2) outputFont(currentMode->aFont, stream, modeSelect); currentMode = mode; if (modeSelect & 2) NXPrintf(stream, "[%s]", currentMode->name); if (currentMode->bFont2) outputFont(currentMode->bFont, stream, modeSelect); if (currentMode->before) NXPrintf(stream, "%s", currentMode->before); if (currentMode->bFont) outputFont(currentMode->bFont, stream, modeSelect); } goto next; } mode++; } while (font->name) { if (font->tag == *text) { outputFont(font->tag, stream, modeSelect); goto next; } font++; } while (tc->c) { if (tc->c == *text) { NXPrintf(stream, "%s", (modeSelect&0x1)?tc->output2:tc->output1); goto next; } tc++; } nbZap++; if (modeSelect & 2) NXPrintf(stream, "$%02X", *text); next: text++; length--; } NXPutc(stream, '\n'); if ((modeSelect & 2) && nbZap) NXPrintf(stream, "******************** ABOVE CONTAINS %d NON DECODED CHARS ***\n", nbZap); NXPutc(stream, '\n'); }
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.