This is asciioutput.c in view mode; [Download] [Up]
/* -*- c -*- */
/*
Webster Access, a program to use NeXT online Webster dictionary.
Copyright (C) 1994 Benoit Grange, ben@fizz.fdn.org
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#import <libc.h>
#import <streams/streams.h>
/* Sections are the first byte in the decoded text buffer */
typedef struct {
int tag;
char *name;
} tSection;
tSection dicoSections[] = {
{ 0x00, "" },
{ 0x01, "* Abbreviations *\n" },
{ 0x03, "* Bibiographical names *\n" },
{ 0x04, "* Geographical names *\n" },
{ 0x05, "* Colleges and Universities *\n" },
{ 0x06, "* Abbreviations used in the dictionary *\n" },
{ 0x00, NULL }
};
/* Modes are found in the buffer */
typedef struct {
int tag;
char *name;
char *before;
int bFont;
char *after;
int aFont;
int bFont2;
int aFont2;
} tMode;
tMode dicoModes[] = {
{ 0xFF, "Initial", "", 0, "", 0 },
{ 0x02, "Pronouciation", " \\", 0, "\\", 0},
{ 0x03, "Ignore-03", "\n", 0, 0, 0 },
{ 0x04, "Genre", " ", 0x0F, "", 0x0E },
{ 0x05, "Plural", " " },
{ 0x06, "Picture expected" },
{ 0x07, "Date", "\n(", 0, ")" },
{ 0x08, "Domain", "\n<domain-tags ", 0, ">", 0 },
{ 0x09, "Ethymology", "\n[", 0, "]" },
{ 0x0B, "Definition", "\n", 0x10, "", 0 },
{ 0x0D, "Usage" },
{ 0x14, "Text", ":", 0x0E },
{ 0x00, NULL }
};
tMode thesModes[] =
{
{ 0xFF, "Initial", "", 0, "", 0 },
{ 0x03, "Genre", " ", 0x0D, "\n", 0x0F},
{ 0x04, "Def", "", 0, " ", 0},
{ 0x05, "Syn", "syn ", 0, "\n", 0, 0x10, 0x0F},
{ 0x06, "Ant", "ant ", 0, "\n", 0, 0x10, 0x0F},
{ 0x07, "Con", "con ", 0, "\n", 0, 0x10, 0x0F},
{ 0x08, "Rel", "rel ", 0, "\n", 0, 0x10, 0x0F},
{ 0x09, "Idiom", "idiom ", 0, "\n", 0, 0x10, 0x0F},
{ 0x0A, "?", " (", 0, ")\n", 0, 0, 0},
{ 0x0B, "Compare", " compare ", 0, "", 0, 0x0D, 0x0F},
};
typedef struct {
int tag;
char *name;
} tFont;
tFont dicoFonts[] = {
{ 0x0E, "Normal" },
{ 0x0F, "Italics" },
{ 0x10, "Bold", },
{ 0x17, "Superscript" },
{ 0x18, "Subscript" },
{ 0x1C, "Sanssherif" },
{ 0x1E, "Smallcaps" },
{ 0x1F, "BoldItalic" },
{ 0x0, NULL },
};
tFont thesFonts[] = {
{ 0x0D, "Italics" },
{ 0x0F, "Normal" },
{ 0x10, "Bold", },
{ 0x11, "Caps", },
};
void outputFont(int tag, NXStream* stream, int modeSelect)
{
if (modeSelect & 2) {
tFont *font = (modeSelect & 0x4)?thesFonts: dicoFonts;
while (font->tag) {
if (font->tag == tag) {
NXPrintf(stream, "{%s}", font->name);
return;
}
font++;
}
}
}
typedef struct {
unsigned char c;
char* output1;
char* output2;
} tChars;
// Normal Chars
tChars dicoSingleChars[] =
{
{0x0c, "||", "||"}, /* double bar */
{0x11, "<e>", "<e>"}, /* inverted "e" */
{0x12, "'", "'"}, /* high stress mark */
{0x13, ",", ","}, /* low stress mark */
{0x15, "<e^->", "<e^->"}, /* long e */
{0x16, "<a:>", "<a:>"}, /* a umlaut */
{0x19, "<o^->", "<o^->"}, /* long o */
{0x1a, "<a^->", "<a^->"}, /* long a */
{0x1b, "<i^->", "<i^->"}, /* long i */
{0x1d, "<o.>", "<o.>"}, /* o with one . */
{0, NULL }
};
// Chars prefixed by 0x0A
tChars dicoAChars[] = {
0x05, "=/=", "=/=", /* not equal to, see "inequality", NeXT wrong*/
0x0a, " ", " ", /* ? spaces? see abernathy */
0x0c, "", "", /* ??? Seems to mean nothing, see "embrace" */
0x10, "$", "$", /* dollar sign, see "run" */
0x11, "`", "`", /* open quote of some sort, see "Amos" */
0x17, "|", "\263", /* dagger, "bowdlerize" */
0x19, "<==>", "<==>", /* triple bond, acetylene */
0x1a, "-", "-", /* superscript "-" for exponents, see electronvolt */
0x1c, "-", "-", /* minus sign, see absolute value */
0x21, "X", "\236", /* multiplication, see avogadronumber */
0x23, "+/-", "\321", /* plus or minus, see primenumber */
0x22, "-:-", "\237", /* "divided by" sign, * indicates frowned-on pronunciation */
0x27, "<root>", "<root>", /* square root sign, see "complexnumber" */
0x28, "'", "'", /* ? Some sort of tick, see adenosine35monophosphate*/
0x29, "\"", "\272", /* inches, see "sort" */
0x2c, "<doubledagger>", "\264", /* doubledagger */
0x34, "'", "\302", /* accent, see "accent" */
0x35, "`", "\301", /* accent, see "accent" */
0x36, "<breve>", "\306", /* breve accent */
0x37, "~", "\303", /* circomflex, see circomflex */
0x38, ".", "\267", /* bullet, see adriamycin */
/*
* greek letters - I can't figure out some of the inconsistencies
*/
0x3c, "<alpha>", "<alpha>", /* alpha, see "alpha" */
0x3e, "<beta>", "\373", /* capital Beta, see "beta" */
0x41, "<gamma>", "<gamma>",
0x42, "<Delta>", "<Delta>",
0x43, "<delta>", "<delta>",
0x44, "<kappa>", "<kappa>",
0x48, "<eta>", "<eta>",
0x49, "<phi>", "<phi>", /* greek phi, see "arccosecant" */
0x4a, "<Eta>", "<Eta>",
0x4c, "<Iota>", "<Iota>",
0x4e, "<epsilon>", "<epsilon>", /* ? */
0x52, "<mu>", "<mu>",
0x59, "<Omega>", "<Omega>",
0x5a, "<pi>", "<pi>", /* pi, see "number" */
0x5c, "<rho>", "<rho>", /* rho, see cardioid */
0x5d, "<Sigma>", "<Sigma>",
0x5e, "<sigma>", "<sigma>",
0x6b, "<omega>", "<omega>",
0x61, "<Upsilon>", "<Upsilon>",
0x62, "<upsilon>", "<upsilon>",
0x63, "<tau>", "<tau>",
0x66, "<x shaped fricative>", "<x shaped fricative>", /* see vernerslaw */
0x6c, "<a'>", "\326", /* a with a ', see abaca */
0x6d, "<a`>", "\325", /* a accent grave, see "hoopla" */
0x6e, "<a^>", "\327", /* a with a ^, see abusimbel */
0x70, "<A'>", "\202", /* A with a ', see altoparan */
0x71, "<A`>", "\201", /* A with a ` see "akempis" */
0x72, "<A^>", "\203", /* A ^, see nasserlake */
0x73, "<A:>", "<A:>", /* A super .., see fetch */
0x74, "<n~>", "\347", /* n tilde, see angeleno */
0x75, "<c,>", "\333", /* c cedille, see acores */
0x76, "<N~>", "<N~>", /* N cedille, see corunna */
0x77, "<C,>", "\207", /* C cedille, see Catalca */
0x78, "<e'>", "\335", /* e accent aigu, see "authority" */
0x79, "<e`>", "\334", /* e accent grave, see aerometer */
0x7a, "<o^>", "\356", /* o circonflex, see berneralpen */
0x7b, "<u'>", "\363", /* u with a ', see aainel */
0x7c, "<U'>", "\230", /* U with a ', see aiun */
0x7e, "<U^>", "\231", /* U with a ^ see elmansra */
0x7f, "<U:>", "<U:>", /* U with two dots, see baden */
0x80, "<u`>", "\362", /* u with a see mohawk */
0x81, "<e^>", "\336", /* e circonflex, see amnemachin */
0x82, "<e:>", "\337", /* e two dots above, see aedes */
0x83, "<E'>", "\211", /* E aigu, "blue" */
0x84, "<E`>", "\210", /* E grave, see aiguillette */
0x85, "<E^>", "\212", /* E circonflex, see arris */
0x86, "<E:>", "\213", /* E two dots, avlona */
0x87, "<u^>", "\364", /* u circonflex, acoupsr */
0x88, "<u:>", "<u:>", /* u with two dots */
0x89, "<o'>", "\355", /* o with a ', abalone */
0x8a, "<o`>", "\354", /* o grave, see amati */
0x8b, "<o^>", "\356", /* o circonflex, see abientot */
0x8c, "<o:>", "\360", /* o two dots, see ale */
0x8d, "<O'>", "\223", /* O', see chatham */
0x8f, "<O^>", "\224", /* O^, see dinner */
0x90, "<O:>", "\226", /* O two dots, see austria */
0x91, "<i'>", "<i'>", /* i ', see acarnania */
0x92, "<i`>", "<i`>", /* i grave, see bokchoy */
0x93, "<i^>", "\344", /* i ^, see aine */
0x94, "<i->", "<i->", /* long i, see abbey */
0x95, "<I'>", "<I'>", /* I ', see cerigo */
0x97, "<I^>", "\216", /* I ^, see devilsisland */
0x98, "<I:>", "\217", /* I with two dots, see Bougie */
0x9a, "<o,>", "<o,>", /* o cedille see hog */
0xa1, "<u^->", "<u^->", /* long u "house" */
0xa2, "<a.>", "<a.>", /* a with one . */
0xa4, "<u.>", "<u.>", /* u with one . */
0xa5, "<nj>", "<nj>", /* nj dipthong */
0xa7, "<oe>", "<oe>", /* "oe" ligature */
0xa8, "<ue>", "<ue>", /* "ue" ligature, see badenwurttemberg */
0xa9, "<ue->", "<ue->", /* long ue , see acoupsr */
0xaa, "<^y>", "<^y>", /* super y, see actegratuit */
0xab, "<\\e>", "<\\e>", /* small inverted e */
0xad, "<a^u>", "<a^u>", /* a super 'u', see aaron */
0xad, "<a^u>", "<a^u>", /* a super hook, see "aceldama" */
0xaf, "<a^o>", "<a^o>", /* a superscript open circle, see "bokmal" */
0xb1, "<A^o>", "<A^o>", /* A super o, see ahvenanmaa */
0xb2, "<cv>", "<cv>", /* c super 'v', see comanche */
0xb3, "<e^v>", "<e^v>", /* e with a "v" accent, see "favor" */
0xb4, "<n^v>", "<n^v>", /* n with a 'v', see mrianskelazne */
0xb5, "<r^v>", "<r^v>", /* r with a v see cricetid */
0xb6, "<s^v>", "<s^v>", /* s with a "v" accent, see "help" */
0xb7, "<z^v>", "<z^v>", /* z super 'v', see chew */
0xb8, "<C^v>", "<C^v>", /* C super v, see apek */
0xb9, "<S^v>", "<S^v>", /* S super v , see koda */
0xba, "<Z^v>", "<Z^v>", /* Z super v, see ika */
0xbb, "<s,>", "<s,>", /* s sub , see alipaa */
0xbc, "<S,>", "<S,>", /* S sub , see tami */
0xbe, "<e,>", "<e,>", /* e sub backwards comma.. owicim */
0xc0, "<l/>", "<l/>", /* ? l with a slash? see aleurone */
0xc1, "<i>", "<i>", /* dotless i? aydn */
0xc2, "<^o>", "\353", /* Degrees symbol, see "fall" */
0xc5, "<o/>", "<o/>", /* o with a slash, see bilberry */
0xc6, "<A^->", "<A^->", /* long A */
0xc7, "<E^->", "<E^->", /* long E, "echo" */
0xc8, "<I->", "<I->", /* long I, see aurangzeb */
0xc9, "<O->", "<O->", /* Long O, see obadiah */
0xca, "<U->", "<U->", /* long U see albucasis */
0xcb, "<^n>", "<^n>", /* small superscript n, see "Bechar" */
0xcc, "<O/>", "<O/>", /* O with a slash, see resund */
0xcd, "<AE>", "<AE>", /* ae ligature, see "ae" */
0xce, "<a~>", "<a~>", /* a ~ see aripuan */
0xcf, "<ae>", "<ae>", /* "ae" ligature */
0xd0, "<y'>", "<y'>", /* y' see comenius */
0xd1, "<o^u>", "<o^u>", /* o with a u, see holyofholies */
0xd2, "<o~>", "\225", /* o~, see cames */
0xd3, "<I'>", "<I'>", /* I' see nonu */
0xd4, "<S'>", "<S'>", /* S ' see auschwitz */
0xd5, "<E,>", "<E,>", /* E sub backwards-comma, see auschwitz */
0xd7, "<A~>", "<A~>", /* A~ see prncipe */
0xd9, "<oe->", "<oe->", /* long oe ligature, austria */
0xde, "<_k_>", "<_k_>", /* k underscore */
0xdf, "<_th_>", "<_th_>", /* underlined "th", "father" */
0xe1, "<y:>", "\375", /* y with two dots, see lous */
0xe2, "<s'>", "<s'>", /* s ', see owicim */
0xe3, "<x'>", "<x'>", /* x ', see alexander */
0xe5, "<L/>", "<L/>", /* L with a / through it, see migyrydz */
0xe6, "<y^->", "<y^->", /* long y "foist" */
0xe8, "<u^v>", "<u^v>", /* u with a v, see roof */
0xea, "<n'>", "<n'>", /* n ', see stanisawi */
0xeb, "<z'>", "<z'>", /* z ', see nebbish */
0xed, "<A^u>", "<A^u>", /* A super u, see adar */
0xf9, "<e.>", "<e.>", /* e super ., see "bark" */
0xfb, "<g^v>", "<g^v>", /* g super v, see shagreen */
0xfd, "<u^u>", "<u^u>", /* u super u, see many */
0, NULL
};
// Chars prefixed by 0x7F
tChars dico7Chars[] =
{
0x00, "<O^~>", "<O^~>", /* O superscript ~, see "amazon" */
0x01, "<o->", "<o->", /* long o, see "abdomen" */
0x06, "<i^u>", "<i^u>", /* i with a hook, see "beluga" */
0x07, "<t.>", "<t.>", /* t sub ., see "adobe" */
0x08, "<T.>", "<T.>", /* T sub ., see "Al" */
0x0a, "<h.>", "<h.>", /* H sub ., see "aceldama" */
0x0b, "<H,>", "<H,>", /* H sub hook, see alijz */
0x13, "<n.>", "<n.>", /* n super ".", see ahriman */
0x15, "<sv.>", "<sv.>", /* s sub ., see "alcazar" in printed dict! */
0x16, "<m.>", "<m.>", /* m super ., see "ahimsa" */
0x17, "<u->", "<u->", /* long u? see dungaree */
0x18, "<ae^->", "<ae^->", /* long ae, see "blue" */
0x19, "<e~>", "<e~>", /* e with a curly accent, see "schwa" */
0x1a, "<a-~>", "<a-~>", /* a with a ~ and a -, see "bandana" */
0x1b, "<_r_>", "<_r_>", /* underlined r, see betel */
0x1d, "<h(u)>", "<h(u)>", /* h with a "u" under it, "basalt", NeXT wrong*/
0x20, "<E^u>", "<E^u>", /* E with a thing on it. see "amorite" */
0x21, "<dv.>", "<dv.>", /* d sub ".", see "alcalde" (in printed) */
0x22, "<rv.>", "<rv.>", /* r sub ".", see "andr" (printed) */
0x25, "<ue->", "<ue->", /* long ue, see cuvier */
0x29, "<Hv.>", "<Hv.>", /* H sub ".", "aggeus" */
0x2a, "<cents>", "<cents>", /* cents sign, see hoof */
0x2b, "<o\">", "<o\">", /* o with two '', see "pengo" */
0x2c, "<j^v>", "<j^v>", /* j with a v, see file */
0x2d, "<n\\.>", "<n\\.>", /* n sub ., see "banyan" */
0x31, "<z.>", "<z.>", /* z sub one ., see nizam */
0x33, "<z..>", "<z..>", /* z sub .., see nizam */
0x34, "<s^v>", "<s^v>", /* see hyksos */
0x35, "<o)>", "<o)>", /* german something, see "dobereiner" */
0x37, "<o,>", "<o,>", /* o sub (hook shaped thing), see "law" */
0x38, "<e-~>", "<e-~>", /* e with a - and a ~, see oneida */
0x3c, "<t..>", "<t..>", /* t sub .., see tabla */
0x5f, "<v>", "<v>", /* v form of circumflex, see circumflex */
0x60, "<i-~>", "<i-~>", /* long i tilde , see chintz */
0x61, "<edh>", "<edh>", /* old english letter, see edh, NeXT wrong */
0x62, "<?>", "<?>", /* Spanish inverted ?, see cooncan */
0x63, "<r..>", "<r..>", /* r sub two dots, see coir */
0x64, "<v>", "<v>", /* v accent, see hacek */
0x65, "<->", "<->", /* macron mark, see macron */
0x66, "<l.>", "<l.>", /* l sub dot, see milo */
0x67, "<|b>", "<|b>", /* strange runic character see futhark */
0x68, "?!", "?!", /* interrobang, see interrobang */
0x69, "<;>", "<;>", /* Messy Egyptian char, see "hyksos" */
0x6a, "==>", "==>", /* "index" character (right pointing arrow), see index */
0x6b, "<u-~>", "<u-~>", /* u with a - and a ~, see mungbean */
0x6c, "<=", "<=", /* less than or equal, see harmonic series */
0x6d, "<i'>", "<i'>", /* i with a something on it, see ephah */
0x6e, "<Rx>", "<Rx>", /* Rx symbol, see Rx */
0x6f, "<paragraph-mark>", "<paragraph-mark>", /* see "paragraph" */
0x70, "<_t_>", "<_t_>", /* underscore t, see "niter" */
0x71, "<epsilon>'", "<epsilon>'", /* see smooth breathing */
0x72, "<iota^>", "<iota^>", /* see smooth breathing */
0x73, "<S.>", "<S.>", /* some Hebrew S sub ., see zion */
0x74, "<b shaped fricative>", "<b shaped fricative>", /* I give up, see "vernerslaw" */
0x75, "<\">", "<\">", /* diaeresis, see diaeresis */
0x76, "<x shaped fricative>", "<x shaped fricative>", /* see vernerslaw */
0x77, "<yogh>", "<yogh>", /* it's an old enlih letter, see "yogh" */
0x78, "||", "||", /* parallel, see parallel */
0x79, "<\\/>", "<\\/>", /* checkmark (see check) - look at what NeXT * uses , it's not even this good */
0x7a, "<e`>", "<e`>", /* e grave, see gabs */
0x7b, "<,>", "<,>", /* cedilla accent, see cedilla */
0x7d, "<intersection>", "<intersection>", /* inverted U, see "cap", NeXT wrong */
0x7f, "<paragraph>", "<paragraph>", /* see "dingbat" - why are there 2 of these? */
0x7e, "<iron-cross>", "<iron-cross>", /* well that's what it looks like, see dingbat*/
0x80, "<rho `>", "<rho `>", /* see roughbreathing */
0x81, "<omega `>", "<omega `>", /* see roughbreathing */
0x82, "#", "#", /* spacemark, see "spacemark", NeXT wrong */
0x84, "<zeta>", "<zeta>", /* see roughbreathing */
0, NULL
};
void convertToAscii(const void* data, int length,
const char* word,
NXStream** outstream, int modeSelect)
{
register const unsigned char* text = data;
tMode *currentMode = (modeSelect & 0x4)? thesModes : dicoModes;
tSection *sect = dicoSections;
int nbZap = 0;
NXStream* stream = NXOpenMemory(NULL, 0, NX_WRITEONLY);
*outstream = stream;
while (sect->name) {
if (sect->tag == *text) {
NXPrintf(stream, "%s", sect->name);
break;
}
sect++;
}
if ((modeSelect & 2) && !sect->name) NXPrintf(stream, "*** Unknown Section ***\n");
text++;
length--;
if ((modeSelect & 0x04) == 0) {
while (length && (*text != ' ')) text++, length--;
if (length) text++, length--;
}
while (length) {
tMode *mode = (modeSelect & 0x4)?thesModes:dicoModes;
tFont *font = (modeSelect & 0x4)?thesFonts:dicoFonts;
tChars *tc = dicoSingleChars;
// Skip zeroes
// if (*text == 0) goto next;
// The ~ replaces the defined word
if (*text == '~') {
NXPrintf(stream, "%s", word);
goto next;
}
// Print normal chars
if ((*text>31) && (*text<127)) {
NXPutc(stream, *text);
goto next;
}
if ((*text == 0x0A) && ((modeSelect & 0x04) == 0)) {
tChars *tcA = dicoAChars;
text++;
length--;
if (!length) break;
while (tcA->output1) {
if (tcA->c == *text) {
NXPrintf(stream, "%s", (modeSelect & 0x1)?tcA->output2:tcA->output1);
goto next;
}
tcA++;
}
nbZap++;
if (modeSelect & 2) NXPrintf(stream, "$0A$%02X", (unsigned)*text);
goto next;
}
if (*text == 0x7F) {
tChars *tc7 = dico7Chars;
text++;
length--;
if (!length) break;
while (tc7->output1) {
if (tc7->c == *text) {
NXPrintf(stream, "%s", (modeSelect & 0x1)?tc7->output2: tc7->output1);
goto next;
}
tc7++;
}
nbZap++;
if (modeSelect & 2) NXPrintf(stream, "$7F$%02X", (unsigned)*text);
goto next;
}
while (mode->name) {
if (mode->tag == *text) {
if (currentMode != mode) {
if (currentMode->aFont) outputFont(currentMode->aFont, stream, modeSelect);
if (currentMode->after) NXPrintf(stream, "%s", currentMode->after);
if (currentMode->aFont2) outputFont(currentMode->aFont, stream, modeSelect);
currentMode = mode;
if (modeSelect & 2) NXPrintf(stream, "[%s]", currentMode->name);
if (currentMode->bFont2) outputFont(currentMode->bFont, stream, modeSelect);
if (currentMode->before) NXPrintf(stream, "%s", currentMode->before);
if (currentMode->bFont) outputFont(currentMode->bFont, stream, modeSelect);
}
goto next;
}
mode++;
}
while (font->name) {
if (font->tag == *text) {
outputFont(font->tag, stream, modeSelect);
goto next;
}
font++;
}
while (tc->c) {
if (tc->c == *text) {
NXPrintf(stream, "%s", (modeSelect&0x1)?tc->output2:tc->output1);
goto next;
}
tc++;
}
nbZap++;
if (modeSelect & 2) NXPrintf(stream, "$%02X", *text);
next:
text++;
length--;
}
NXPutc(stream, '\n');
if ((modeSelect & 2) && nbZap)
NXPrintf(stream, "******************** ABOVE CONTAINS %d NON DECODED CHARS ***\n", nbZap);
NXPutc(stream, '\n');
}
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.