MacToNeXTText.m

This is MacToNeXTText.m in view mode; [Download] [Up]
/***********************************************************************\
Common class for converting Mac to NeXT text in all Convert programs
Copyright (C) 1993 David John Burrowes

This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

The author, David John Burrowes, can be reached at:
	davidjohn@kira.net.netcom.com
	David John Burrowes
	1926 Ivy #10
	San Mateo, CA 94403-1367
\***********************************************************************/

/*====================================================================
This is the implementation file for the MacToNeXTText class.  Full documentation for this class can be found in the MacToNeXTText.rtf file.  I will not duplicate all that fine information here.

NOTE: You may find that text doesn't line up properly unless you use the New Century Schoolbook Roman typeface, since this was created with it.

INFORMATION:
	This is $Revision: 1.2 $ of this file
	It was last modified by $Author: death $ on $Date: 93/04/04 23:44:36 $
 	$Log:	MacToNeXTText.m,v $
Revision 1.2  93/04/04  23:44:36  death
Sun Apr  4 23:44:36 PDT 1993

Revision 1.1  93/01/10  15:08:03  death
Sun Jan 10 15:08:03 PST 1993

====================================================================*/


#import "MacToNeXTText.h"
#import <memory.h>	// for memcpy
#include <strings.h>


@implementation MacToNeXTText


//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Routine:		init:
//	Parameters:	none
//	Returns:		self
//	Stores:		none
//	Description:
//		This initalizes the object by filling in the convert array that it uses to
//		dictate most of its character conversions.
//	Bugs:
//		Note that we store a null to indicate any characters that can't be
//		converted directly.  Because of this strategy, the null character must be dealt
//		with in this object, though really it is part of the superclass' territory.
//		other more complex implementations (keep a flag with each array entry
//		to indicate if we can convert to it, or keep integers so one can store more than
//		one of 256 values in an entry  might be used to reimplement this if needed.
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- (Instance) init
{
	Integer	counter;
	//
	[super init];

	StrictIM = NO;
	//
	//	Initialize conversion array with whatever our superclass likes to do.
	//
	for (counter = 0; counter < 256; counter++)
		ConvertArray[counter] = [super	ConvertCharacter: (Character) counter];
	//
	//	We view the world as: we are just like whatever our superclass is, except
	//	that we differ for characters 0,  CR, and anything above 128. =)  So,
	//	modify the table to reflect all of this.  (note: indices are essencially mac
	//	character codes, while values we are assigning are NeXT character codes).
	//
	ConvertArray[CARRIAGERETURN] = NEWLINE; // Convert mac CR's to LF's
	ConvertArray[0x11] = NullCharacter;	// (commandsymbol)
	ConvertArray[0x12] = NullCharacter;	// (check)
	ConvertArray[0x13] = NullCharacter;	// (diamond)
	ConvertArray[0x14] = NullCharacter;	// (apple)

	ConvertArray[0x80] = 0x85;			// (Adieresis)
	ConvertArray[0x81] = 0x86;			// (Aring)
	ConvertArray[0x82] = 0x87;			// (Ccedilla)
	ConvertArray[0x83] = 0x89;			// (Eacute)
	ConvertArray[0x84] = 0x91;			// (Ntilde)
	ConvertArray[0x85] = 0x96;			// (Odieresis)
	ConvertArray[0x86] = 0x9A;			// (Udieresis)
	ConvertArray[0x87] = 0xD6;			// (aacute)
	ConvertArray[0x88] = 0xD5;			// (agrave)
	ConvertArray[0x89] = 0xD7;			// (acircumflex)
	ConvertArray[0x8A] = 0xD9;			// (adieresis)
	ConvertArray[0x8B] = 0xD8;			// (atilde)
	ConvertArray[0x8C] = 0xDA;			// (aring)
	ConvertArray[0x8D] = 0xDB;			// (ccedilla)
	ConvertArray[0x8E] = 0xDD;			// (eacute)
	ConvertArray[0x8F] = 0xDC;			// (egrave)
	//	0x9X
	ConvertArray[0x90] = 0xDE;			// (ecircumflex)
	ConvertArray[0x91] = 0xDF;			// (dieresis)
	ConvertArray[0x92] = 0xE2;			// (iacute)
	ConvertArray[0x93] = 0xE0;			// (igrave)
	ConvertArray[0x94] = 0xE4;			// (icircumflex)
	ConvertArray[0x95] = 0xE5;			// (idieresis)
	ConvertArray[0x96] = 0xE7;			// (ntilde)
	ConvertArray[0x97] = 0xED;			// (oacute)
	ConvertArray[0x98] = 0xEC;			// (ograve)
	ConvertArray[0x99] = 0xEE;			// (ocircumflex)
	ConvertArray[0x9A] = 0xF0;			// (odieresis)
	ConvertArray[0x9B] = 0xEF;			// (otilde)
	ConvertArray[0x9C] = 0xF3;			// (uacute)
	ConvertArray[0x9D] = 0xF2;			// (ugrave)
	ConvertArray[0x9E] = 0xF4;			// (ucircumflex)
	ConvertArray[0x9F] = 0xF6;			// (udieresis)
	//	0xAX
	ConvertArray[0xA0] = 0xB2;			// (dagger)
	ConvertArray[0xA1] = NullCharacter;	// (degree)
	ConvertArray[0xA2] = 0xA2;			// (cent)
	ConvertArray[0xA3] = 0xA3;			// (sterling)
	ConvertArray[0xA4] = 0xA7;			// (section)
	ConvertArray[0xA5] = 0xB7;			// (bullet)
	ConvertArray[0xA6] = 0xB6;			// (paragraph)
	ConvertArray[0xA7] = 0xFB;			// (germandbls)
	ConvertArray[0xA8] = 0xB0;			// (registerserif)
	ConvertArray[0xA9] = 0xA0;			// (copyrightserif)
	ConvertArray[0xAA] = NullCharacter;	// (trademarkserif)
	ConvertArray[0xAB] = 0xC2;			// (acute)
	ConvertArray[0xAC] = 0xC8;			// (dieresis)
	ConvertArray[0xAD] = NullCharacter;	// (notequal)
	ConvertArray[0xAE] = 0xE1;			// (AE)
	ConvertArray[0xAF] = 0xE9;			// (Oslash)
	//	0xBX
	ConvertArray[0xB0] = NullCharacter;	//(infinity)
	ConvertArray[0xB1] = 0xD1;			// (plusminus)
	ConvertArray[0xB2] = NullCharacter;	// (lessequal)
	ConvertArray[0xB3] = NullCharacter;	// (greaterequal)
	ConvertArray[0xB4] = 0xA5;			// (yen)
	ConvertArray[0xB5] = 0x9D;			// (mu)
	ConvertArray[0xB6] = NullCharacter;	// (partialdiff)
	ConvertArray[0xB7] = NullCharacter;	// (summation)
	ConvertArray[0xB8] = NullCharacter;	// (product) 
	ConvertArray[0xB9] = NullCharacter;	// (pi)
	ConvertArray[0xBA] = NullCharacter;	// (integral)
	ConvertArray[0xBB] = 0xE3;			// (ordfeminine)
	ConvertArray[0xBC] = 0xEB;			// (ordmasculine)
	ConvertArray[0xBD] = NullCharacter;	// (Omega)
	ConvertArray[0xBE] = 0xF1;			// (ae)
	ConvertArray[0xBF] = 0xF9;			// (oslash)
	//	0xC0
	ConvertArray[0xC0] = 0xBF;			// (questiondown)
	ConvertArray[0xC1] = 0xA1;			// (exclamdown)
	ConvertArray[0xC2] = 0xBE;			// (logicalnot)
	ConvertArray[0xC3] = NullCharacter;	// (radical)
	ConvertArray[0xC4] = 0xA6;			// (florin)
	ConvertArray[0xC5] = NullCharacter;	// (approxequal)
	ConvertArray[0xC6] = NullCharacter;	// (delta)
	ConvertArray[0xC7] = 0xAB;			// (guillemotleft)
	ConvertArray[0xC8] = 0xBB;			// (guillemotright)
	ConvertArray[0xC9] = 0xBC;			// (elipsis)
	ConvertArray[0xCA] = 0x80;			// (nbspace)   next calls it: (figsp)
	ConvertArray[0xCB] = 0x81;			// (Agrave)
	ConvertArray[0xCC] = 0x84;			// (Atilde)
	ConvertArray[0xCD] = 0x95;			// (Otilde)
	ConvertArray[0xCE] = 0xEA;			// (OE)
	ConvertArray[0xCF] = 0xFA;			// (oe)
	//	0xD0
	ConvertArray[0xD0] = 0xB1;			// (endash)
	ConvertArray[0xD1] = 0xD0;			// (emdash)
	ConvertArray[0xD2] = 0xAA;			// (quotedblleft)
	ConvertArray[0xD3] = 0xBA;			// (quotedblright)
	ConvertArray[0xD4] = 0x60;			// (quoteleft)
	ConvertArray[0xD5] = 0x27;			// (quoteright)
	ConvertArray[0xD6] = 0x9F;			// (divide)
	ConvertArray[0xD7] = NullCharacter;	// (lozenge)
	ConvertArray[0xD8] = 0xFD;			// (ydieresis)
	ConvertArray[0xD9] = NullCharacter;	// (Ydieresis)  (usually a picture in IM fonts)
	ConvertArray[0xDA] = 0xA4;			// (fraction)
	ConvertArray[0xDB] = 0xA8;			// (currency)
	ConvertArray[0xDC] = 0xAC;			// (guilsinglleft)
	ConvertArray[0xDD] = 0xAD;			// (guilsinglright)
	ConvertArray[0xDE] = 0xAE;			// (fi)
	ConvertArray[0xDF] = 0xAF;			// (fl)
	//	0xE0
	ConvertArray[0xE0] = 0xB3;			// (daggerdbl)
	ConvertArray[0xE1] = 0xB4;			// (periodcentered)
	ConvertArray[0xE2] = 0xB8;			// (quotesinglebase)
	ConvertArray[0xE3] = 0xB9;			// (quotedblbase)
	ConvertArray[0xE4] = 0xBD;			// (perthousand)
	ConvertArray[0xE5] = 0x83;			// (Acircumflex)
	ConvertArray[0xE6] = 0x8A;			// (Ecircumflex)
	ConvertArray[0xE7] = 0x82;			// (Aacute)
	ConvertArray[0xE8] = 0x8B;			// (Edieresis)
	ConvertArray[0xE9] = 0x88;			// (Egrave)
	ConvertArray[0xEA] = 0x8D;			// (Iacute)
	ConvertArray[0xEB] = 0x8E;			// (Icircumflex)
	ConvertArray[0xEC] = 0x8F;			// (Idieresis)
	ConvertArray[0xED] = 0x8C;			// (Igrave)
	ConvertArray[0xEE] = 0x93;			// (Oacute)
	ConvertArray[0xEF] = 0x94;			// (Ocircumflex)
	//	0xF0
	ConvertArray[0xF0] = NullCharacter;	// (apple)
	ConvertArray[0xF1] = 0x92;			// (Ograve)
	ConvertArray[0xF2] = 0x98;			// (Uacute)
	ConvertArray[0xF3] = 0x99;			// (Ucircumflex)
	ConvertArray[0xF4] = 0x97;			// (Ugrave)
	ConvertArray[0xF5] = 0xF5;			// (dotlessi)
	ConvertArray[0xF6] = 0xC3;			// (circumflex)
	ConvertArray[0xF7] = 0xC4;			// (tilde)
	ConvertArray[0xF8] = 0xC5;			// (macron)
	ConvertArray[0xF9] = 0xC6;			// (breve)
	ConvertArray[0xFA] = 0xC7;			// (dotaccent)
	ConvertArray[0xFB] = 0xCA;			// (ring)
	ConvertArray[0xFC] = 0xCB;			// (cedilla)
	ConvertArray[0xFD] = 0xCD;			// (hungarumlaut)
	ConvertArray[0xFE] = 0xCE;			// (ogonek)
	ConvertArray[0xFF] = 0xCF;			// (caron)
	
	return self;
}




//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Routine:		ConvertCharacter:
//	Parameters:	a character to be converted
//	Returns:		the converted character
//	Stores:		the character that we are returning.
//	Description:
//		This uses the ConvertArray set up in the initialization to convert
//		a character from a Mac character set to a NeXT.  It allows for strict
//		adherance to the table in Inside Mac vol 1. p. 221.  It returns the converted
//		character and a result code based on its succes.
//	Bugs:
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- (Character) ConvertCharacter: (Character) theCharacter
{
	Character	result;
	Boolean		couldconvert = YES;
	[self ResetResults];
	//
	//	Determine if we are doing 'strict inside mac vol 1 p. 221' conversions.
	//	if so, and the character is undefind in IM, return character literally.
	//
	if ((theCharacter > 0xD8) && (StrictIM == YES))
		result = theCharacter;
	else
	{
		//
		//	In general, look up our result in the conversion array.  if we get a
		//	null back (and we don't didn't pass a null), indicate we could not
		//	convet properly.
		//
		result = ConvertArray[theCharacter];
		if ((result == NullCharacter) &&  (theCharacter != NullCharacter))
		{
			result = theCharacter;
			couldconvert = NO;
		}
	}
	//
	//	Store the result, and return.
	//
	[self	PutCharacter: result Into: FIRST_RESULT];
	if (couldconvert == YES)
		[self	StoreErrorCode: errOK AndText: "Character converted!"];
	else
		[self	StoreErrorCode: errCANTMAPTOONE AndText: "No equivalent standard NeXTSTEP character"];
	return result;
}


//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Routine:		ConvertString:WithLength:
//	Parameters:	a pointer to a string of data to be converted
//				the length of the data the poiner poins to.
//	Returns:		a poiner to a new block of data to be converted
//	Stores:		the pointer we are returning
//				the length of the new data
//	Description:
//		This converts the text in the source data area into a new area.
//		This is passed a string of characters.  This then creates a new string,
//		and converts all the source characters from a Macintosh encoding to
//		the destination string.  This differs from ConvertCharacter:, above, int only
//		two ways:  The first is that this processes multiple characters at once.
//		the second is that if it finds a character that's doens't map to a specific
//		alternate character, it will replace it with a multi character string.  E.g.
//		less  than or equal to is replaced with <=  And the apple symbol is just
//		replaced with [apple].  The source string is not altered in any way.  The
//		caller is responsible for disposing of the returned string.
//	Bugs:
//		we don't really check for errors anywhere (not taht there are many oportunities)...
//	History
//		92.12.31	djb	Added a null-character termination to the string.
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- (Pointer) ConvertString: (Pointer) theData WithLength: (Integer) length
{
	Character*	source = (Character*) theData; // getting a better typed ptr.
	Integer		index,  destindex = 0;
	Integer		destsize = length*1.5;
	Integer		replacelength;
	CString		replacement;
	Character*	temp, *dest = (Character*) NewPointer(destsize);
	Character	result;
	[self	ResetResults];
	//
	for (index = 0; index < length; index++)
	{
		//
		//	Determine if we are doing 'strict inside mac vol 1 p. 221' conversions.
		//	if so, and the character is undefind in IM, return character literally.
		//	Otherwise, just get whatever the standard conversion is.
		//
		if ((source[index] > 0xD8)&&(StrictIM == YES))
			result = source[index];
		else
			result = ConvertArray[source[index]];
		//
		//	If our result is not a null (or if we passed a null), stick the result into
		//	the destination space.  increment the space if necessary.
		//
		if ((result != NullCharacter) ||  (source[index] == NullCharacter))
		{
			if (destindex >= destsize)
			{
				temp = NewPointer(destsize +512);
				memcpy(temp, dest, destsize);
				FreePointer(dest);
				dest = temp;
				destsize += 512;
			}
			dest[destindex] = result;
			destindex++;
		}
		else
		{
			//
			//	We evidently got a null back, telling us 'yo! I can't convert this to
			//	a different single character'.  So, now we determine which it was, and
			//	determine what string to use in its place.  Generally, we use the PS
			//	name for the character in []'s.
			//
			switch (source[index])
			{
				case 0x11:			// (commandsymbol)
					replacement = "[commandsymbol]";
					break;
				case 0x12:			// (check)
					replacement = "[check]";
					break;
				case 0x13:			// (diamond)
					replacement = "[diamond]";
					break;
				case 0x14:			// (apple)
					replacement = "[apple]";
					break;
				case 0xA1:			// (degree)
					replacement = "[degrees]";
					break;
				case 0xAA:			// (trademarkserif)
					replacement = "[trademarkserif]";
					break;
				case 0xAD:			// (notequal)
					replacement = "<>";
					break;
				case 0xB0:			// (infinity)
					replacement = "[infinity]";
					break;
				case 0xB2:			// (lessequal)
					replacement = "<=";
					break;
				case 0xB3:			// (greaterequal)
					replacement = ">=";
					break;
				case 0xB6:			// (partialdiff)
					replacement = "[partialdiff]";
					break;
				case 0xB7:			// (summation)
					replacement = "[summation]";
					break;
				case 0xB8:			// (product) 
					replacement = "[product]";
					break;
				case 0xB9:			// (pi)
					replacement = "[pi]";
					break;
				case 0xBA:			// (integral)
					replacement = "[integral]";
					break;
				case 0xBD:			// (Omega)
					replacement = "[Omega]";
					break;
				case 0xC3:			// (radical)
					replacement = "[radical]";
					break;
				case 0xC5:			// (approxequal)
					replacement = "[approxequal]";
					break;
				case 0xC6:			// (delta)
					replacement = "[delta]";
					break;
				case 0xD7:			// (lozenge)
					replacement = "[lozenge]";
					break;
				case 0xD9:			// (Ydieresis)  (usually a picture in IM fonts)
					if (StrictIM == NO)
						replacement = "[Ydieresis(or picture)]";
					else
						replacement = "\331"; // the character itself.
					break;
				case 0xF0:			// (apple)
					if (StrictIM == NO)
						replacement = "[apple]";
					else
						replacement = "\360"; // the character itself.
					break;
				default:
					replacement = "[[THERE IS A BUG IN THE PROIGRAM (no joke)]]";
					break;
			}
			//
			//	With a string in hand to use now, get it's length, and assure that we
			//	have space to store it (if not, increment the memory block's size).
			//	copy the string into the output memory space, and continue.
			//
			replacelength = strlen(replacement);
			if ( (destindex+replacelength) >= destsize)
			{
				temp = NewPointer(destsize + 512);
				memcpy(temp, dest, destsize);
				FreePointer(dest);
				dest = temp;
				destsize = destsize + 512;
			}
			memcpy(&dest[destindex], replacement, strlen(replacement));
			destindex += replacelength;
		}
	}
	//
	//	Added, so that the block of memory returned is also null terminated.
	//
	if (destindex >= destsize)
	{
		temp = NewPointer(destsize +512);
		memcpy(temp, dest, destsize);
		FreePointer(dest);
		dest = temp;
		destsize += 512;
	}
	dest[destindex] = EndOfCString;


	//
	//	Store the result, and return.  (note that destindex always ends up pointing to
	//	the next byte to be used, and thus is also a count of the total number of bytes
	//	in the dest string.
	//
	[self	StorePointer: dest];
	[self	PutPositiveInteger: destindex  Into: SECOND_RESULT];
	[self	StoreErrorCode: errOK AndText: "Nothing could go wrong (pathetic program)!"];
	return dest;
}


//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Routine:		UseIMVI:
//	Parameters:	a boolean value
//	Returns:		self
//	Stores:		none
//	Description:
//		This simply allows the user to toggle whether they want to do strict IM
//		conversions.
//	Bugs:
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- UseIM1:  (Boolean) doItStrictly
{
	StrictIM = doItStrictly;
	return self;
}
@end
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.