NeXTToMacText.m

This is NeXTToMacText.m in view mode; [Download] [Up]
/***********************************************************************\
Common class for converting NeXT to Mac text in all Convert programs
Copyright (C) 1993 David John Burrowes

This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

The author, David John Burrowes, can be reached at:
	davidjohn@kira.net.netcom.com
	David John Burrowes
	1926 Ivy #10
	San Mateo, CA 94403-1367
\***********************************************************************/

/*====================================================================
This is the implementation file for the NeXTToMacText class.  Full documentation for this class can be found in the NeXTToMacText.rtf file.  I will not duplicate all that fine information here.

NOTE: You may find that text doesn't line up properly unless you use the New Century Schoolbook Roman typeface, since this was created with it.

INFORMATION:
	This is $Revision: 1.2 $ of this file
	It was last modifiFed by $Author: death $ on $Date: 93/04/04 23:44:44 $
 	$Log:	NeXTToMacText.m,v $
Revision 1.2  93/04/04  23:44:44  death
Sun Apr  4 23:44:44 PDT 1993

Revision 1.1  93/01/10  15:08:29  death
Sun Jan 10 15:08:29 PST 1993

====================================================================*/


#import "NeXTToMacText.h"
#import <memory.h>	// for memcpy
#include <strings.h>


@implementation NeXTToMacText


//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Routine:		init:
//	Parameters:	none
//	Returns:		self
//	Stores:		none
//	Description:
//		This initalizes the object by filling in the convert array that it uses to
//		dictate most of its character conversions.
//	Bugs:
//		Note that we store a null to indicate any characters that can't be
//		converted directly.  Because of this strategy, the null character must be dealt
//		with in this object, though really it is part of the superclass' territory.
//		other more complex implementations (keep a flag with each array entry
//		to indicate if we can convert to it, or keep integers so one can store more than
//		one of 256 values in an entry  might be used to reimplement this if needed.
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- (Instance) init
{
	Integer	counter;
	//
	[super init];
	
	UseCurlyQuotes = NO;
	//
	//	Initialize conversion array with whatever our superclass likes to do.
	//
	for (counter = 0; counter < 256; counter++)
		ConvertArray[counter] = [super 	ConvertCharacter: (Character) counter];
	//
	//	We view the world as: we are just like whatever our superclass is, except
	//	that we differ for characters 0,  NL, and anything above 128. =)  So,
	//	modify the table to reflect all of this.  (note: indices are essencially mac
	//	character codes, while values we are assigning are NeXT character codes).
	//
	ConvertArray[NEWLINE] = CARRIAGERETURN; // Convert line endings
	//	0x8X
	ConvertArray[0x80] = 0xCA;		// (nbspace)next
	ConvertArray[0x81] = 0xCB;		// (Agrave)
	ConvertArray[0x82] = 0xE7;		// (Aacute)
	ConvertArray[0x83] = 0xE5;		// (Acircumflex)
	ConvertArray[0x84] = 0xCC;		// (Atilde)
	ConvertArray[0x85] = 0x80;		// (Adieresis)
	ConvertArray[0x86] = 0x81;		// (Aring)
	ConvertArray[0x87] = 0x82;		// (Ccedilla)
	ConvertArray[0x88] = 0xE9;		// (Egrave)
	ConvertArray[0x89] = 0x83;		// (Eacute)
	ConvertArray[0x8A] = 0xE6;		// (Ecircumflex)
	ConvertArray[0x8B] = 0xE8;		// (Edieresis)
	ConvertArray[0x8C] = 0xED;		// (Igrave)
	ConvertArray[0x8D] = 0xEA;		// (Iacute)
	ConvertArray[0x8E] = 0xEB;		// (Icircumflex)
	ConvertArray[0x8F] = 0xEC;		// (Idieresis)
	//	0x9X
	ConvertArray[0x90] = NullCharacter;	// (Eth)
	ConvertArray[0x91] = 0x84;		// (Ntilde)
	ConvertArray[0x92] = 0xF1;		// (Ograve)
	ConvertArray[0x93] = 0xEE;		// (Oacute)
	ConvertArray[0x94] = 0xEF;		// (Ocircumflex)
	ConvertArray[0x95] = 0xCD;		// (Otilde)
	ConvertArray[0x96] = 0x85;		// (Odieresis)
	ConvertArray[0x97] = 0xF4;		// (Ugrave)
	ConvertArray[0x98] = 0xF2;		// (Uacute)
	ConvertArray[0x99] = 0xF3;		// (Ucircumflex)
	ConvertArray[0x9A] = 0x86;		// (Udieresis)
	ConvertArray[0x9B] = NullCharacter;	// (Yacute)
	ConvertArray[0x9C] = NullCharacter;	// (Thorn)
	ConvertArray[0x9D] = 0xB5;		// (mu)
	ConvertArray[0x9E] = NullCharacter;			// (multiply)
	ConvertArray[0x9F] = 0xD6;		// (divide)
	//	0xAX
	ConvertArray[0xA0] = 0xA9;		// (copyrightserif)
	ConvertArray[0xA1] = 0xC1;		// (exclamdown)
	ConvertArray[0xA2] = 0xA2;		// (cent)
	ConvertArray[0xA3] = 0xA3;		// (sterling)
	ConvertArray[0xA4] = 0xDA;		// (fraction)
	ConvertArray[0xA5] = 0xB4;		// (yen)
	ConvertArray[0xA6] = 0xC4;		// (florin)
	ConvertArray[0xA7] = 0xA4;		// (section)
	ConvertArray[0xA8] = 0xDB;		// (currency)
	ConvertArray[0xA9] = 0x27;		// (quotesingle)
	ConvertArray[0xAA] = 0xD2;		// (quotedblleft)
	ConvertArray[0xAB] = 0xC7;		// (guillemotleft)
	ConvertArray[0xAC] = 0xDC;		// (guilsinglleft)
	ConvertArray[0xAD] = 0xDD;		// (guilsinglright)
	ConvertArray[0xAE] = 0xDE;		// (fi)
	ConvertArray[0xAF] = 0xDF;		// (fl)
	//	0xBX
	ConvertArray[0xB0] = 0xA8;		// (registerserif)
	ConvertArray[0xB1] = 0xD0;		// (endash)
	ConvertArray[0xB2] = 0xA0;		// (dagger)
	ConvertArray[0xB3] = 0xE0;		// (daggerdbl)
	ConvertArray[0xB4] = 0xE1;		// (periodcentered)
	ConvertArray[0xB5] = NullCharacter;	// (brokenbar)
	ConvertArray[0xB6] = 0xA6;		// (paragraph)
	ConvertArray[0xB7] = 0xA5;		// (bullet)
	ConvertArray[0xB8] = 0xE2;		// (quotesinglebase)
	ConvertArray[0xB9] = 0xE3;		// (quotedblbase)
	ConvertArray[0xBA] = 0xD3;		// (quotedblright)
	ConvertArray[0xBB] = 0xC8;		// (guillemotright)
	ConvertArray[0xBC] = 0xC9;		// (elipsis)
	ConvertArray[0xBD] = 0xE4;		// (perthousand)
	ConvertArray[0xBE] = 0xC2;		// (logicalnot)
	ConvertArray[0xBF] = 0xC0;		// (questiondown)
	//	0xCX
	ConvertArray[0xC0] = NullCharacter;			// (onesuperior)
	ConvertArray[0xC1] = 0x60;		// (grave)
	ConvertArray[0xC2] = 0xAB;		// (acute)
	ConvertArray[0xC3] = 0xF6;		// (circumflex)
	ConvertArray[0xC4] = 0xF7;		// (tilde)
	ConvertArray[0xC5] = 0xF8;		// (macron)
	ConvertArray[0xC6] = 0xF9;		// (breve)
	ConvertArray[0xC7] = 0xFA;		// (dotaccent)
	ConvertArray[0xC8] = 0xAC;		// (dieresis)
	ConvertArray[0xC9] = NullCharacter;	// (twosuperior)
	ConvertArray[0xCA] = 0xFB;		// (ring)
	ConvertArray[0xCB] = 0xFC;		// (cedilla)
	ConvertArray[0xCC] = NullCharacter;	// (threesuperior)
	ConvertArray[0xCD] = 0xFD;		// (hungarumlaut)
	ConvertArray[0xCE] = 0xFE;		// (ogonek)
	ConvertArray[0xCF] = 0xFF;		// (caron)
	//	0xDX
	ConvertArray[0xD0] = 0xD1;		// (emdash)
	ConvertArray[0xD1] = 0xB1;		// (plusminus)
	ConvertArray[0xD2] = NullCharacter;	// (onequarter)
	ConvertArray[0xD3] = NullCharacter;	// (onehalf)
	ConvertArray[0xD4] = NullCharacter;	// (threequarters)
	ConvertArray[0xD5] = 0x88;		// (agrave)
	ConvertArray[0xD6] = 0x87;		// (aacute)
	ConvertArray[0xD7] = 0x89;		// (acircumflex)
	ConvertArray[0xD8] = 0x8B;		// (atilde)
	ConvertArray[0xD9] = 0x8A;		// (adieresis)
	ConvertArray[0xDA] = 0x8C;		// (aring)
	ConvertArray[0xDB] = 0x8D;		// (ccedilla)
	ConvertArray[0xDC] = 0x8F;		// (egrave)
	ConvertArray[0xDD] = 0x8E;		// (eacute)
	ConvertArray[0xDE] = 0x90;		// (ecircumflex)
	ConvertArray[0xDF] = 0x91;		// (dieresis)
	//	0xE0
	ConvertArray[0xE0] = 0x93;		// (igrave)
	ConvertArray[0xE1] = 0xAE;		// (AE)
	ConvertArray[0xE2] = 0x92;		// (iacute)
	ConvertArray[0xE3] = 0xBB;		// (ordfeminine)
	ConvertArray[0xE4] = 0x94;		// (icircumflex)
	ConvertArray[0xE5] = 0x95;		// (idieresis)
	ConvertArray[0xE6] = NullCharacter;	// (eth)
	ConvertArray[0xE7] = 0x96;		// (ntilde)
	ConvertArray[0xE8] = NullCharacter;	// (Lslash)
	ConvertArray[0xE9] = 0xAF;		// (Oslash)
	ConvertArray[0xEA] = 0xCE;		// (OE)
	ConvertArray[0xEB] = 0xBC;		// (ordmasculine)
	ConvertArray[0xEC] = 0x98;		// (ograve)
	ConvertArray[0xED] = 0x97;		// (oacute)
	ConvertArray[0xEE] = 0x99;		// (ocircumflex)
	ConvertArray[0xEF] = 0x9B;		// (otilde)
	//	0xF0
	ConvertArray[0xF0] = 0x9A;		// (odieresis)
	ConvertArray[0xF1] = 0xBE;		// (ae)
	ConvertArray[0xF2] = 0x9D;		// (ugrave)
	ConvertArray[0xF3] = 0x9C;		// (uacute)
	ConvertArray[0xF4] = 0x9E;		// (ucircumflex)
	ConvertArray[0xF5] = 0xF5;		// (dotlessi)
	ConvertArray[0xF6] = 0x9F;		// (udieresis)
	ConvertArray[0xF7] = NullCharacter;	// (yacute)
	ConvertArray[0xF8] = NullCharacter;	// (lslash)
	ConvertArray[0xF9] = 0xBF;		// (oslash)
	ConvertArray[0xFA] = 0xCF;		// (oe)
	ConvertArray[0xFB] = 0xA7;		// (germandbls)
	ConvertArray[0xFC] = NullCharacter;	// (thorn)
	ConvertArray[0xFD] = 0xD8;		// (ydieresis)
	ConvertArray[0xFE] = NullCharacter;	// (not assigned)
	ConvertArray[0xFF] = NullCharacter;	// (ascii control char?)
	return self;
}




//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Routine:		ConvertCharacter:
//	Parameters:	a character to be converted
//	Returns:		the converted character
//	Stores:		the character that we are returning.
//	Description:
//		This uses the ConvertArray set up in the initialization to convert
//		a character from the NeXT character set to a Mac char.  It allows for converting
//		single quotes to either Mac style (grave accent and single neuter quote) or to
//		curly mac quotes (not in the lower ascii range)  It returns the converted
//		character and a result code based on its succes.
//	Bugs:
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- (Character) ConvertCharacter: (Character) theCharacter
{
	Character	result;
	Boolean		couldconvert = YES;
	[self ResetResults];
	//
	//	Look up our result in the conversion array. 	//
	result = ConvertArray[theCharacter];
	//
	//	If we've been asked to convert ` and ' to nice Mac curly quotes,
	//	rather than � and ', then see if we got a quote, and change the result
	//	appropriately.
	//
	if (UseCurlyQuotes == YES)
	{
		if (theCharacter == 0x27)
			result = 0xD5;		// (quoteright)
		else if (theCharacter == 0x60)
			result = 0xD4;		// (quoteleft)
	}
	//
	//	Alternately, if we got a null back, and we were give (and we don't didn't
	//	pass a null), indicate we could not convet properly.
	//
	if ((result == NullCharacter) &&  (theCharacter != NullCharacter))
	{
		result = theCharacter;
		couldconvert = NO;
	}
	//
	//	Store the result, and return.
	//
	[self	PutCharacter: result Into: FIRST_RESULT];
	if (couldconvert == YES)
		[self	StoreErrorCode: errOK AndText: "Character converted!"];
	else
		[self	StoreErrorCode: errCANTMAPTOONE AndText: "No equivalent standard Macintosh character"];
	return result;
}


//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Routine:		ConvertString:WithLength:
//	Parameters:	a pointer to a string of data to be converted
//				the length of the data the poiner poins to.
//	Returns:		a poiner to a new block of data to be converted
//	Stores:		the pointer we are returning
//				the length of the new data
//	Description:
//		This converts the text in the source data area into a new area.
//		This is passed a string of characters.  This then creates a new string,
//		and converts all the source characters from a Macintosh encoding to
//		the destination string.  This differs from ConvertCharacter:, above, int only
//		two ways:  The first is that this processes multiple characters at once.
//		the second is that if it finds a character that's doens't map to a specific
//		alternate character, it will replace it with a multi character string.  E.g.
//		less  than or equal to is replaced with <=  And the apple symbol is just
//		replaced with [apple].  The source string is not altered in any way.  The
//		caller is responsible for disposing of the returned string.
//	Bugs:
//		we don't really check for errors anywhere (not taht there are many oportunities)...
//	History
//		92.12.31	djb	Added a null-character termination to the string.
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- (Pointer) ConvertString: (Pointer) theData WithLength: (Integer) length
{
	Character*	source = (Character*) theData; // getting a better typed ptr.
	Integer		index;
	PositiveInteger	destindex = 0;
	Integer		destsize = length*1.5;
	Integer		replacelength;
	CString		replacement;
	Character*	temp, *dest = (Character*) NewPointer(destsize);
	Character	result;
	[self	ResetResults];
	for (index = 0; index < length; index++)
	{
		//
		//	Look up our result in the conversion array. 		//
		result = ConvertArray[source[index]];
		//
		//	If we've been asked to convert ` and ' to nice Mac curly quotes,
		//	rather than � and ', then see if we got a quote, and change the result
		//	appropriately.
		//
		if (UseCurlyQuotes == YES)
		{
			if (source[index] == 0x27)
				result = 0xD5;		// (quoteright)
			else if (source[index] == 0x60)
				result = 0xD4;		// (quoteleft)
		}
		//
		//	If our result is not a null (or if we passed a null), stick the result into
		//	the destination space.  increment the space if necessary.
		//
		if ((result != NullCharacter) ||  (source[index] == NullCharacter))
		{
			if (destindex >= destsize)
			{
				temp = NewPointer(destsize +512);
				memcpy(temp, dest, destsize);
				FreePointer(dest);
				dest = temp;
				destsize += 512;
			}
			dest[destindex] = result;
			destindex++;
		}
		else
		{
			//
			//	We evidently got a null back, telling us 'yo! I can't convert this to
			//	a different single character'.  So, now we determine which it was, and
			//	determine what string to use in its place.  Generally, we use the PS
			//	name for the character in []'s.
			//
			switch (source[index])
			{
				case 0x90:			// (Eth)
					replacement = "[Eth]";
					break;
				case 0x9B:			// (Yacute)
					replacement = "[Yacute]";
					break;
				case 0x9C:			// (Thorn)
					replacement = "[Thorn]";
					break;
				case 0x9E:			// (multiply)
					replacement = "[multiply]";
					break;
				case 0xB5:			// (brokenbar)
					replacement = "[brokenbar]";
					break;
				case 0xC0:			// (onesuperior)
					replacement = "[onesuperior]";
					break;
				case 0xC9:			// (twosuperior)
					replacement = "[twosuperior]";
					break;
				case 0xCC:			// (threesuperior)
					replacement = "[threesuperior]";
					break;
				case 0xD2:			// (onequarter)
					replacement = "[onequarter]";
					break;
				case 0xD3:			// (onehalf)
					replacement = "[onehalf]";
					break;
				case 0xD4:			// (threequarters)
					replacement = "[threequarters]";
					break;
				case 0xE6:			// (eth)
					replacement = "[eth]";
					break;
				case 0xE8:			// (Lslash)
					replacement = "[Lslash]";
					break;
				case 0xF7:			// (yacute)
					replacement = "[yacute]";
					break;
				case 0xF8:			// (lslash)
					replacement = "[lslash]";
					break;
				case 0xFC:			// (thorn)
					replacement = "[thorn]";
					break;
				case 0xFE:			// (not assigned)
					replacement = "[not defined]";
					break;
				case 0xFF:			// (ascii control char?)
					replacement = "[ascii control char]";
					break;				default:
					replacement = "[[THERE IS A BUG IN THE PROIGRAM (no joke)]]";
					break;
			}
			//
			//	With a string in hand to use now, get it's length, and assure that we
			//	have space to store it (if not, increment the memory block's size).
			//	copy the string into the output memory space, and continue.
			//
			replacelength = strlen(replacement);
			if ( (destindex+replacelength) >= destsize)
			{
				temp = NewPointer(destsize + 512);
				memcpy(temp, dest, destsize);
				FreePointer(dest);
				dest = temp;
				destsize = destsize + 512;
			}
			memcpy(&dest[destindex], replacement, strlen(replacement));
			destindex += replacelength;
		}
	}
	//
	//	Added, so that the block of memory returned is also null terminated.
	//
	if (destindex >= destsize)
	{
		temp = NewPointer(destsize +512);
		memcpy(temp, dest, destsize);
		FreePointer(dest);
		dest = temp;
		destsize += 512;
	}
	dest[destindex] = EndOfCString;
	//
	//	Store the result, and return.  (note that destindex always ends up pointing to
	//	the next byte to be used, and thus is also a count of the total number of bytes
	//	in the dest string.
	//
	[self	StorePointer: dest];
	[self	PutPositiveInteger: destindex Into: SECOND_RESULT];
	[self	StoreErrorCode: errOK AndText: "Nothing could go wrong (pathetic program)!"];
	return dest;
}


//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Routine:		ConvertSingleQuotes:
//	Parameters:	a boolean value
//	Returns:		self
//	Stores:		none
//	Description:
//		This simply allows the user to toggle whether they want to do convert single
//		quotes to pretty curly quotes on the Mac or not.
//	Bugs:
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- ConvertSingleQuotes:  (Boolean) useCurlyQuotes
{
	UseCurlyQuotes = useCurlyQuotes;
	return self;
}
@end
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.