eText.TaggedText.m

This is eText.TaggedText.m in view mode; [Download] [Up]
{\rtf0\ansi{\fonttbl\f0\fmodern Courier;\f1\ftech Symbol;\f2\fmodern Ohlfs;}
\paperw11640
\paperh8400
\margl40
\margr40
{\colortbl;\red0\green0\blue0;}
\pard\tx520\tx1060\tx1600\tx2120\tx2660\tx3200\tx3720\tx4260\tx4800\tx5320\f0\b0\i0\ulnone\fs24\fc0\cf0 //�����������������������������������������������������������������������������\
//
\i 	
\b FILENAME
\b0 :	
\b\i0 eText.TaggedText.m\

\b0 //
\i 	
\b SUMMARY
\b0 :	
\b\i0 Implementation of Tagged markup formats of eText (HTML, LaTeX)
\b0 \
//	
\b\i CATEGORY
\b0 :
\i0 	
\b TaggedText
\b0 \
//	
\b\i PROTOCOLS
\b0 :
\i0 	
\b NXRegisterPrintfProc()
\b0 \
//	
\b\i INTERFACE
\b0 :
\i0 	
\b See ChooseEncoding.Tool
\b0 \
//	
\b\i AUTHOR
\b0 :		
\b\i0 Rohit Khare and Tom Zavisca
\b0 \
//	
\b\i COPYRIGHT
\b0 :	
\f1\i0 �
\f0\b 1993,94 California Institure of Technology, eText Project\

\b0 //�����������������������������������������������������������������������������\
//	
\b\i Implementation Comments\

\b0\i0 //		There's a lot of malarkey involved with the API for the encoders and\
//	the printfProc registrations. All the "user" needs is 
\b + flushHTMLEncoding
\b0 .\
//\
//		
\b currentHTMLEncoding
\b0  is a file-global 
\b char**
\b0  of 
\b ENTITIES
\b0  entries\
//		
\b currentHTMLEncodingLength
\b0  is a file-global unsigned 
\b char
\b0 [
\b ENTITIES
\b0 ];\
//		
\b defaultHTMLEncoding
\b0  is a 
\b char*
\b0 [
\b ENTITIES
\b0 ] C array.\
//		An attempt is made to read encodings from the file specified in\
//	a user dwrite.\
//\
//		Something that bothers me about encoders: how can we properly use\
//	Symbol font?  
\f1 S � 
\f0 \\sigma ?\
//\
//		After investigating the interactions of Annotations and Tagging, \
//	I have decided that it is not neccessary to return to ground state\
//	before writing HTML for an annotation. The physical and logical tags\
//	apply to the run containing the annotation as well.\
//		Reason: link button on same line as an H3 font descrip.\
//		Output: close, linkbutton, open, desc, close splits into two lines.\
//�����������������������������������������������������������������������������\
//	
\b\i History
\b0\i0 \
//	11/04/94:	
\b DESIGN REV: HTML annotations don't start from ground state
\b0 \
//	10/17/94:	
\b Cleaned up for eText5.
\b0 \
//	08/05/94:	
\b Completely Rearchitected for 5.0. RK
\b0 \
//�����������������������������������������������������������������������������\
//	
\b Imported Interfaces
\b0 \
//\
	#import "
\b eText.TaggedText.h
\b0 "\
	#import <
\b ctype.h
\b0 >\
\

\i @implementation eText(TaggedText)\

\i0 //�����������������������������������������������������������������������������\
//	
\b Stream Operators
\b0 \
//\
- 
\b writeHTML
\b0 :(
\b NXStream
\b0  *) s 
\b withTags
\b0 :(
\b taggingInfo
\b0  *) tags \{\
	
\b int
\b0  					k,N;\
	
\b NXRun
\b0  					*
\b curr
\b0 ;\
	
\b NXTextBlock
\b0 				*
\b currBlock
\b0 ;\
	int						
\b currentOffset
\b0 ,
\b targetOffset
\b0 ;\
	
\b NXAtom
\b0 					
\b closer
\b0 ;\
	
\b taggingInfo
\b0  			*
\b aTag
\b0 ,*
\b found
\b0 ;\
	
\b taggingInfo
\b0  			
\b fakeTag
\b0 ;\
	
\b id
\b0 						
\b fm
\b0 ;			//
\i FontManager
\i0 \
	\
	
\b N
\b0  = 
\b theRuns
\b0 ->chunk.
\b used
\b0 /sizeof(NXRun);\
	
\b curr
\b0  = theRuns->
\b runs
\b0 ;\
	
\b currBlock
\b0  = [self 
\b firstTextBlock
\b0 ];\
	
\b currentOffset
\b0  = 
\b 0
\b0 ;\
	
\b closer
\b0  = 
\b NULL
\b0 ;\
	
\b fm
\b0  = [
\b FontManager
\b0  new];\
	\
	for (k=0; 
\b k < N
\b0 ; k++) \{\
		if (curr->
\b info
\b0  == 
\b NULL
\b0 ) \{\

\i 			// Encode the state for this run.\
			// First, is this a tagged run? If so, do we need to close the\
			// previous state?\

\i0 			\
			
\b aTag
\b0  = 
\b tags
\b0 ; 
\b found
\b0 =
\b NULL
\b0 ;\
			while (
\b !found
\b0  && aTag && 
\b aTag->font
\b0 ) \{\
				if (
\b curr->font
\b0  == 
\b aTag->font
\b0 ) 
\b found
\b0  = aTag; \
				aTag++;\
			\}\
			\
			if (
\b !found
\b0 ) \{				// 
\i search for physical tags\

\i0 				
\b char
\b0  tmp[32],*
\b family
\b0 ;\
				
\b NXFontTraitMask
\b0  
\b traits
\b0 ;\
				int 
\b weight
\b0 ;\
				float 
\b size
\b0 ;\
				BOOL 
\b isFixedPitch
\b0 ;\
				\
				*tmp = 0;\
				
\b isFixedPitch
\b0  = 		// 
\i primitive monospacing test\

\i0 					([curr->font 
\b metrics
\b0 ])
\fc1\cf1 ->
\b isFixedPitch
\b0\fc0\cf0 ;\
				[fm 
\b getFamily
\b0 :&family 
\b traits
\b0 :&traits 
\b weight
\b0 :&weight \
					
\b size
\b0 :&size 
\b ofFont
\b0 :curr->font];\
				if (
\b isFixedPitch
\b0 ) strcat(tmp, "
\b <TT>
\b0 ");\
				if (curr->rFlags.
\b underline
\b0 ) strcat(tmp, "
\b <U>
\b0 ");\
				if (traits & 
\b NX_BOLD
\b0 ) strcat(tmp, "
\b <B>
\b0 ");\
				if (traits & 
\b NX_ITALIC
\b0 ) strcat(tmp, "
\b <I>
\b0 ");				\
				if (*tmp) \{				// 
\i don't bother unless we got styles\

\i0 					
\b fakeTag
\b0 .
\b start
\b0  = NXUniqueString(
\b tmp
\b0 );\
					*tmp = 0;\
					if (traits & 
\b NX_ITALIC
\b0 ) strcat(tmp, "
\b </I>
\b0 ");\
					if (traits & 
\b NX_BOLD
\b0 ) strcat(tmp, "
\b </B>
\b0 ");\
					if (curr->rFlags.
\b underline
\b0 ) strcat(tmp, "
\b </U>
\b0 ");\
					if (
\b isFixedPitch
\b0 ) strcat(tmp, "
\b </TT>
\b0 ");\
					
\b fakeTag
\b0 .
\b end
\b0  = NXUniqueString(
\b tmp
\b0 );\
					
\b found
\b0  = &
\b fakeTag
\b0 ;\
				\}\
			\}\
			\
			if (
\b found
\b0 ) \{				// 
\i stop previous tag, if differs\

\i0 				if (
\b found->end != closer
\b0 ) \{\
					if (closer) \{ 		// 
\i transition
\i0 \
						
\b NXWrite
\b0 (s, 
\b closer
\b0 , strlen(closer));\
					\}\
					
\b NXWrite
\b0 (s, found->
\b start
\b0 , strlen(found->start));\
					
\b closer
\b0  = found->
\b end
\b0 ;\
				\}\
			\} else if (closer) \{ 		// 
\i return to ground state\

\i0 				
\b NXWrite
\b0 (s, 
\b closer
\b0 , strlen(closer));\
				
\b closer
\b0  = 
\b NULL
\b0 ;\
			\}\
		\} else \{\
			// 
\i clear tagging state, write out the annotation
\i0 \
			// 
\i MAJOR DESIGN CHANGE: SEE HISTORY & NOTES!!!! RK, 11/4
\i0 \
			// if (closer) \{\
			//	
\b NXWrite
\b0 (s, 
\b closer
\b0 , strlen(closer));\
			//	
\b closer
\b0  = 
\b NULL
\b0 ;\
			//\}\
			if ([curr->
\b info
\b0  
\b respondsTo
\b0 :@selector(
\b writeHTML:forView:
\b0 )]) \{\
				[curr->
\b info
\b0  
\b writeHTML
\b0 :s 
\b forView
\b0 :self];\
			\}\
		\}\
		\

\i 		// encode the text corresponding to the run\
		// misson is to write (cumulative) curr->chars chars beginning\
		// at currentCount. boundaries may map onto > 1 block
\i0 \
		
\b targetOffset
\b0  = 
\b currentOffset
\b0  
\b +
\b0  curr->
\b chars
\b0 ;\
		// 
\i consume full blocks
\i0 \
		while ((
\b currBlock
\b0 ) && (
\b targetOffset
\b0  >= (currBlock->
\b chars
\b0 ))) \{\
			if (
\b !
\b0 (curr->
\b info
\b0 )) 		// 
\i throw annotated bits in bucket
\i0 \
				if(targetOffset > currentOffset) // 
\i don't pass len=0 to encoder
\i0  \
					
\b HTMLEncoder
\b0 (s, currBlock->text+currentOffset,\
								currBlock->chars - currentOffset);\
			targetOffset-=currBlock->chars;\
			currBlock=currBlock->next;\
			currentOffset=0;\
		\}\
		// 
\i consume partial block
\i0 \
		if (
\b currBlock
\b0  && (
\b !
\b0  curr->
\b info
\b0 ))     // 
\i throw annotated bits in bucket\

\i0 			if(targetOffset > currentOffset) // 
\i don't pass len=0 to encoder
\i0  \
				
\b HTMLEncoder
\b0 (s, currBlock->text + currentOffset,\
							targetOffset-currentOffset);\
		currentOffset=targetOffset;\
		curr++;\
	\}	\
	if (closer) \{\
		
\b NXWrite
\b0 (s, 
\b closer
\b0 , strlen(closer));\
		
\b closer
\b0  = 
\b NULL
\b0 ;\
	\}\
	return self;\
\}\
\
- 
\b writeLaTeX
\b0 :(
\b NXStream
\b0  *) s 
\b withTags
\b0 :(
\b taggingInfo
\b0  *) tags \{\
	
\b int
\b0  					k,N;\
	
\b NXRun
\b0  					*
\b curr
\b0 ;\
	
\b NXTextBlock
\b0 				*
\b currBlock
\b0 ;\
	int						
\b currentOffset
\b0 ,
\b targetOffset
\b0 ;\
	
\b NXAtom
\b0 					
\b closer
\b0 , 
\b oldStart
\b0 =
\b NULL
\b0 ;\
	
\b taggingInfo
\b0  			*
\b aTag
\b0 ,*
\b found
\b0 ;\
	
\b taggingInfo
\b0  			
\b fakeTag
\b0 ;\
	
\b id
\b0 						
\b fm
\b0 ;			//
\i FontManager
\i0 \
	\
	
\b N
\b0  = 
\b theRuns
\b0 ->chunk.
\b used
\b0 /sizeof(NXRun);\
	
\b curr
\b0  = theRuns->
\b runs
\b0 ;\
	
\b currBlock
\b0  = [self 
\b firstTextBlock
\b0 ];\
	
\b currentOffset
\b0  = 
\b 0
\b0 ;\
	
\b closer
\b0  = 
\b NULL
\b0 ;\
	
\b fm
\b0  = [
\b FontManager
\b0  new];\
	\
	for (k=0; 
\b k < N
\b0 ; k++) \{\
		if (curr->
\b info
\b0  == 
\b NULL
\b0 ) \{\

\i 			// Encode the state for this run.\
			// First, is this a tagged run? If so, do we need to close the\
			// previous state?\

\i0 			\
			
\b aTag
\b0  = 
\b tags
\b0 ; 
\b found
\b0 =
\b NULL
\b0 ;\
			while (
\b !found
\b0  && aTag && 
\b aTag->font
\b0 ) \{\
				if (
\b curr->font
\b0  == 
\b aTag->font
\b0 ) 
\b found
\b0  = aTag; \
				aTag++;\
			\}\
			\
			if (
\b !found
\b0 ) \{				// 
\i search for physical tags\

\i0 				
\b char
\b0  tmp[32],*
\b family
\b0 ;\
				
\b NXFontTraitMask
\b0  
\b traits
\b0 ;\
				int 
\b weight
\b0 ;\
				float 
\b size
\b0 ;\
				BOOL 
\b isFixedPitch
\b0 ;\
				\
				*tmp = 0;\
				
\b isFixedPitch
\b0  = 		// 
\i primitive monospacing test\

\i0 					([curr->font 
\b metrics
\b0 ])
\fc1\cf1 ->
\b isFixedPitch
\b0\fc0\cf0 ;\
				[fm 
\b getFamily
\b0 :&family 
\b traits
\b0 :&traits 
\b weight
\b0 :&weight \
					
\b size
\b0 :&size 
\b ofFont
\b0 :curr->font];\
				if (
\b isFixedPitch
\b0 ) strcat(tmp, "
\b \{\\\\tt 
\b0 ");\
				if (curr->rFlags.
\b underline
\b0 ) strcat(tmp, "
\b \\\\underline\{
\b0 ");\
				if (traits & 
\b NX_BOLD
\b0 ) strcat(tmp, "
\b \{\\\\bf 
\b0 ");\
				if (traits & 
\b NX_ITALIC
\b0 ) strcat(tmp, "
\b \{\\\\it 
\b0 ");				\
				if (*tmp) \{				// 
\i don't bother unless we got styles\

\i0 					
\b fakeTag
\b0 .
\b start
\b0  = NXUniqueString(
\b tmp
\b0 );\
					*tmp = 0;\
					if (traits & 
\b NX_ITALIC
\b0 ) strcat(tmp, "
\b \}
\b0 ");\
					if (traits & 
\b NX_BOLD
\b0 ) strcat(tmp, "
\b \}
\b0 ");\
					if (curr->rFlags.
\b underline
\b0 ) strcat(tmp, "
\b \}
\b0 ");\
					if (
\b isFixedPitch
\b0 ) strcat(tmp, "
\b \}
\b0 ");\
					
\b fakeTag
\b0 .
\b end
\b0  = NXUniqueString(
\b tmp
\b0 );\
					
\b found
\b0  = &
\b fakeTag
\b0 ;\
				\}\
			\}\
			\
			if (
\b found
\b0 ) \{				// 
\i stop previous tag, if differs\
				// how can we tell if the state has changed?\
				// the assumption is that a run necessarily corresponds\
				// to a change of style -- but a colorchange wouldn't.\
				// with HTML, the exact closer string would be unique\
				// for LaTeX the heuristic is that every _opener_ is unique\
				// thus, if the opener is unchanged, we short-circuit the\
				// close-reopen pair.\

\i0 				if (found->
\b start
\b0  
\b !=
\b0  
\b oldStart
\b0 ) \{\
					if (closer) \{ 		// 
\i transition
\i0 \
						
\b NXWrite
\b0 (s, 
\b closer
\b0 , strlen(closer));\
					\}\
					
\b NXWrite
\b0 (s, found->
\b start
\b0 , strlen(found->start));\
					
\b closer
\b0  = found->
\b end
\b0 ;\
					
\b oldStart
\b0  = found->
\b start
\b0 ;\
				\}\
			\} else if (closer) \{ 		// 
\i return to ground state\

\i0 				
\b NXWrite
\b0 (s, 
\b closer
\b0 , strlen(closer));\
				
\b closer
\b0  = 
\b NULL
\b0 ;\
			\}\
		\} else \{\
			// 
\i clear tagging state, write out the annotation
\i0 \
			if (closer) \{\
				
\b NXWrite
\b0 (s, 
\b closer
\b0 , strlen(closer));\
				
\b closer
\b0  = 
\b NULL
\b0 ;\
			\}\
			if ([curr->
\b info
\b0  
\b respondsTo
\b0 :@selector(
\b writeLaTeX:forView:
\b0 )]) \{\
				[curr->
\b info
\b0  
\b writeLaTeX
\b0 :s 
\b forView
\b0 :self];\
			\}\
		\}\
		\

\i 		// encode the text corresponding to the run\
		// misson is to write (cumulative) curr->chars chars beginning\
		// at currentCount. boundaries may map onto > 1 block
\i0 \
		
\b targetOffset
\b0  = 
\b currentOffset
\b0  
\b +
\b0  curr->
\b chars
\b0 ;\
		// 
\i consume full blocks
\i0 \
		while ((
\b currBlock
\b0 ) && (
\b targetOffset
\b0  >= (currBlock->
\b chars
\b0 ))) \{\
			if (
\b !
\b0 (curr->
\b info
\b0 )) 		// 
\i throw annotated bits in bucket
\i0 \
				if(targetOffset > currentOffset) // 
\i don't pass len=0 to encoder
\i0  \
					
\b LaTeXEncoder
\b0 (s, currBlock->text+currentOffset,\
								currBlock->chars - currentOffset);\
			targetOffset-=currBlock->chars;\
			currBlock=currBlock->next;\
			currentOffset=0;\
		\}\
		// 
\i consume partial block
\i0 \
		if (
\b currBlock
\b0  && (
\b !
\b0  curr->
\b info
\b0 ))     // 
\i throw annotated bits in bucket\

\i0 			if(targetOffset > currentOffset) // 
\i don't pass len=0 to encoder
\i0  \
				
\b LaTeXEncoder
\b0 (s, currBlock->text + currentOffset,\
							targetOffset-currentOffset);\
		currentOffset=targetOffset;\
		curr++;\
	\}	\
	if (closer) \{\
		
\b NXWrite
\b0 (s, 
\b closer
\b0 , strlen(closer));\
		
\b closer
\b0  = 
\b NULL
\b0 ;\
	\}\
	return self;\
\}\
\
//�����������������������������������������������������������������������������\
//	
\b String Operators
\b0 \
//\
+ 
\b encodeHTML
\b0 :(NXStream *) s 
\b from
\b0 : (unsigned char *) theChars 
\b length
\b0 : (int)len \{\
	
\b HTMLEncoder
\b0 (s,theChars,len); return self;\}\
+ 
\b encodeLaTeX
\b0 :(NXStream *) s 
\b from
\b0 : (unsigned char *) theChars 
\b length
\b0 :(int)len \{ \
	
\b LaTeXEncoder
\b0 (s,theChars,len); return self;\}\
+ 
\b encodeURI
\b0 :(NXStream *) s 
\b from
\b0 : (unsigned char *) theChars 
\b length
\b0 :(int)len \{ \
	
\b URIEncoder
\b0 (s,theChars,len); return self;\}\
\
//�����������������������������������������������������������������������������\
//	
\b HTMLEncoder API
\b0 \
//\
+ 
\b flushHTMLEncoding
\b0  \{\
	// 
\i Next access will force reloading according to UserModel
\i0 \
	
\b currentHTMLEncoding
\b0 =
\b NULL
\b0 ; return self;\
\}\
\
@end\
\
//�����������������������������������������������������������������������������\
//	
\b Encoder API
\b0 \
//	
\i note that these are file-globals, and thus apply to the
\i0 \
//	
\i entire eText process; encodings are not chosen on a per-document basis
\i0 .\
//\
\
const char		*
\b defaultHTMLEncoding
\b0 [
\b ENTITIES
\b0 ];\
char 			**
\b currentHTMLEncoding
\b0 =NULL;\
char			*
\b currentBuffer
\b0 ;\
unsigned char 	*
\b currentHTMLEncodingLength
\b0 ;\
\
void 
\b HTMLEncoder
\b0 (NXStream *
\b stream
\b0 , unsigned char *
\b item
\b0 , int 
\b len
\b0 ) \{\
	int 
\b i
\b0 ;\
	\
	if(
\b !currentHTMLEncoding
\b0 ) \{		// 
\i Hence the 
\b +flushHTMLEncoding\

\b0\i0 		char 
\b HTMLResourceFilePath
\b0 [MAXPATHLEN];\
		\
		[[NXBundle 
\b mainBundle
\b0 ] 
\b getPath
\b0 :
\b HTMLResourceFilePath
\b0  \
				
\b forResource
\b0 :[userModel stringQuery:"
\b HTMLEncoding
\b0 "]\
				
\b ofType
\b0 :
\b ENCD_EXT
\b0 ];\
		if(*HTMLResourceFilePath) \{	// 
\i try to load from this path\

\i0 			unsigned char 	*
\b tempBuffer
\b0 ;\
			unsigned char 	*
\b tempLengths
\b0 ;\
			char 			**
\b tempEncoding
\b0 ;\
			\
			
\b tempBuffer
\b0  = 
\b malloc
\b0 (
\b 4*MAXPATHLEN
\b0 *sizeof(unsigned char));\
						  // 
\i The above is C Programmer's Disease
\i0 \
			
\b tempLengths
\b0  = malloc(
\b ENTITIES
\b0  * sizeof(
\b unsigned char
\b0 ));\
			
\b tempEncoding
\b0  = malloc(
\b ENTITIES
\b0  * sizeof(
\b char *
\b0 ));\
			if (
\b readEncodingTableFromFile
\b0 (\
				
\b HTMLResourceFilePath
\b0 , 
\b tempEncoding
\b0 , 
\b tempLengths
\b0 , 
\b tempBuffer
\b0 ))\{\
				// 
\i no freeing if not defaultEncoding
\i0 \
				// 
\i is a memory leak noone cares about
\i0 \
				
\b currentHTMLEncodingLength
\b0  = 
\b tempLengths
\b0 ;\
				
\b currentHTMLEncoding
\b0  = 
\b tempEncoding
\b0 ;\
				
\b currentBuffer
\b0  = 
\b tempBuffer
\b0 ;\
			\} else \{\
				
\b free
\b0 (tempBuffer); 
\b free
\b0 (tempLengths); 
\b free
\b0 (tempEncoding);\
				tempBuffer = tempLengths = tempEncoding = NULL;\
				NXLogError("
\b Could not read encoding data from %s
\b0 ",\
							
\b HTMLResourceFilePath
\b0 );\
			\}\
		\} \
	\}\
		\
	if (
\b !currentHTMLEncoding
\b0 ) \{		// 
\i Error fall-through\

\i0 		// 
\i "use" the defaultEncoding
\i0 .\
		currentHTMLEncoding = 
\b defaultHTMLEncoding
\b0 ;\
		currentHTMLEncodingLength=malloc(ENTITIES * sizeof(unsigned char));\
		for(i=0;i<ENTITIES;i++)\
			
\b currentHTMLEncodingLength
\b0 [i] = \
				(
\b currentHTMLEncoding
\b0 [i] ? 
\b strlen
\b0 (
\b currentHTMLEncoding
\b0 [i]) : 
\b 0
\b0 );\
	\}\
	\
	// 
\i The two "modes" of the Encoder, using the userData parameter
\i0 \
	if(
\b !len
\b0 ) len=
\b strlen
\b0 ((unsigned char*)
\b item
\b0 );\
	\
	for (i=0; i<len; i++) \{\
		
\b NXWrite
\b0 (
\b stream
\b0 , 
\b currentHTMLEncoding
\b0 [(unsigned char)
\b item
\b0 [i]],\
				
\b currentHTMLEncodingLength
\b0 [(unsigned char)
\b item
\b0 [i]]);\
	\}\
\}\
\
void 
\b URIEncoder
\b0 (NXStream *
\b s
\b0 , unsigned char *
\b item
\b0 , int 
\b len
\b0 ) \{\
	int i;\
	\
	// 
\i The two "modes" of the Encoder, using the userData parameter
\i0 \
	if(
\b !len
\b0 ) len=
\b strlen
\b0 ((unsigned char*)
\b item
\b0 );\
\
	for (i=0; i<len; i++) \{\
		unsigned ch = item[i];\
		if (
\b isalnum
\b0 (ch) || ((ch=='
\b %
\b0 ')&&
\b isdigit
\b0 (item[
\b i+1
\b0 ])&&
\b isdigit
\b0 (item[
\b i+2
\b0 ])))\
			
\b NXPutc
\b0 (s,
\b ch
\b0 );\
		else 
\b switch
\b0  (
\b ch
\b0 ) \{\
			case '
\b :
\b0 ': \
			case '
\b /'
\b0 : \
			case '
\b \\\\
\b0 ': \
		// 
\i "safe" in RFC1630 BNF\

\i0 			case '
\b $
\b0 ': \
			case '
\b -
\b0 ': \
			case '
\b _
\b0 ': \
			case '
\b @
\b0 ': \
			case '
\b .
\b0 ': \
			case '
\b &
\b0 ': \
			case '+':\
			// 
\i "extra" in RFC1630 BNF\

\i0 			case '
\b !
\b0 ': \
			case '
\b *
\b0 ': \
			case '\\
\b "
\b0 ': \
			case '\\
\b '
\b0 ': \
			case '
\b |
\b0 ': \
			case '
\b ,
\b0 ':\
						NXPutc(s,
\b ch
\b0 ); break;\
			
\b default
\b0 :	// 
\i encode as %hex\
						
\i0 NXPrintf(s,"%%%x", 
\b ch
\b0 ); break;\
		\}\
	\}\
\}\
\
void 
\b LaTeXEncoder
\b0 (NXStream *
\b s
\b0 , unsigned char *
\b item
\b0 , int 
\b len
\b0 ) \{\
	int i;\
	\
	// 
\i The two "modes" of the Encoder, using the userData parameter
\i0 \
	if(
\b !len
\b0 ) len=
\b strlen
\b0 ((unsigned char*)
\b item
\b0 );\
\
	for (i=0; i<len; i++) \{\
		unsigned ch = item[i];\
		
\b switch
\b0  (
\b ch
\b0 ) \{\
			case '
\b <
\b0 ':	NXWrite(s, "
\b $<$
\b0 ",3); break;\
			case '
\b >
\b0 ':	NXWrite(s, "
\b $>$
\b0 ",3); break;\
			case '
\b \\\\
\b0 ':	NXWrite(s, "
\b $\\\\backslash$
\b0 ",12); break;\
			case '
\b ~
\b0 ':	NXWrite(s, "
\b \\\\~
\b0 ",2); break;\
			case '
\b ^
\b0 ':	NXWrite(s, "
\b \\\\^
\b0 ",2); break;\
			case '
\b \{
\b0 ':	NXWrite(s, "
\b \\\\\{
\b0 ",2); break;\
			case '
\b \}
\b0 ':	NXWrite(s, "
\b \\\\\}
\b0 ",2); break;\
			case '
\b %
\b0 ':	NXWrite(s, "
\b \\\\%
\b0 ",2); break;\
			case '
\b #
\b0 ':	NXWrite(s, "
\b \\\\#
\b0 ",2); break;\
			case '
\b _
\b0 ':	NXWrite(s, "
\b \\\\_
\b0 ",2); break;\
			case '
\b &
\b0 ':	NXWrite(s, "
\b \\\\&
\b0 ",2); break;\
			case '
\b $
\b0 ':	NXWrite(s, "
\b \\\\$
\b0 ",2); break;\
			case '
\b \\n
\b0 ':	NXWrite(s, "
\b \\\\par\\n
\b0 ",1); break;\
			case '
\b \\t
\b0 ':	// 
\i we should do something here for tabs
\i0 \
			
\b default
\b0 :	// 
\i we should do something here for extended symbols\
						
\i0 NXPutc(s, 
\b ch
\b0 ); break;\
		\}\
	\}\
\}\
\
BOOL 
\b readEncodingTableFromFile
\b0 (const char *
\b path
\b0 , char **
\b targetEncoding
\b0 ,\
								unsigned char *
\b targetLengths
\b0 ,char*
\b targetBuffer
\b0 ) \{\
	
\b NXStream
\b0 	*
\b s
\b0 ;\
	int			i,j,len,maxlen;\
	char		*theChars,*current;\
		\
	
\b s
\b0  = 
\b NXMapFile
\b0 (
\b path
\b0 , 
\b NX_READONLY
\b0 );\
	if(s) \{\
		
\b NXGetMemoryBuffer
\b0 (
\b s
\b0 ,&theChars,&len, &maxlen);\
		i=j=0;\
		while (
\b i<ENTITIES
\b0  && (j < len)) \{\
			while (theChars[j] == '
\b #
\b0 ') \{ // 
\i consume comment lines\

\i0 				
\b while
\b0  ((j<len) && (
\b theChars
\b0 [
\b j++
\b0 ] != '
\b \\n
\b0 '));	\
			\}\
			if (theChars[j++] == '
\b \\"
\b0 ')\{  // 
\i we have a winner!\

\i0 				current = targetBuffer;\
				
\b while
\b0  (theChars[j] 
\b !=
\b0  '
\b \\"
\b0 ')\{\
					// 
\i heuristics identical to NXStringTable
\i0 \
					
\b switch
\b0  (theChars[j]) \{\
						case '
\b \\\\
\b0 ' :	\
							switch (theChars[++j]) \{\
							case '
\b n
\b0 '  : *(targetBuffer++)= '
\b \\n
\b0 '; break;\
							case '
\b t
\b0 '  : *(targetBuffer++)= '
\b \\t
\b0 '; break;\
							case '
\b \\\\
\b0 ' : *(targetBuffer++)= '
\b \\\\
\b0 '; break;\
							case '
\b \\"
\b0 ' : *(targetBuffer++)= '
\b \\"
\b0 '; break;\
							case '
\b a
\b0 '  : *(targetBuffer++)= '
\b \\a
\b0 '; break;\
							case '
\b b
\b0 '  : *(targetBuffer++)= '
\b \\b
\b0 '; break;\
							case '
\b f
\b0 '  : *(targetBuffer++)= '
\b \\f
\b0 '; break;\
							case '
\b r
\b0 '  : *(targetBuffer++)= '
\b \\r
\b0 '; break;\
							case '
\b v
\b0 '  : *(targetBuffer++)= '
\b \\v
\b0 '; break;\
							
\b default
\b0 	  : *(targetBuffer++)= 
\b theChars
\b0 [j]; break;\
							\} break;\
						
\b default
\b0 	:	*(targetBuffer++) = 
\b theChars
\b0 [j]; break;\
					\}\
					j++;\
				\}\
				*(targetBuffer++)=0;\
				
\b targetEncoding
\b0 [i]=
\b current
\b0 ;\
				
\b targetLengths
\b0 [i]=
\b strlen
\b0 (
\b targetEncoding
\b0 [i]);\
				i++;\
			\}\
			
\b while
\b0  ((j<len) && (
\b theChars
\b0 [
\b j++
\b0 ] 
\b !=
\b0  '
\b \\n
\b0 ')); // 
\i consume until EOL
\i0 \
		\}\
		
\b NXCloseMemory
\b0 (s, 
\b NX_FREEBUFFER
\b0 );\
		return 
\b YES
\b0 ;\
	\}\
	return 
\b NO
\b0 ;\
\}\
\
\
const char * 
\b defaultHTMLEncoding
\b0 [
\b ENTITIES
\b0 ] = \{\
	"",				/* NUL */\
	"",				/* SOH */\
	"",				/* STX */\
	"",				/* ETX */\
	"",				/* EOT */\
	"",				/* ENQ */\
	"",				/* ACK */\
	"",				/* BEL */\
	"",				/* BS */\
	"\\t",			/* TAB */\
	"<BR>\\n",		/* NEWLINE */\
	"",				/* VT */\
	"",				/* FF */\
	"\\r",			/* CR */\
	"",				/* SO */\
	"",				/* SI */\
	"",				/* DLE */\
	"",				/* DC1 (XON)*/\
	"",				/* DC2 */\
	"",				/* DC3 (XOFF)*/\
	"",				/* DC4 */\
	"",				/* NAK */\
	"",				/* SYN */\
	"",				/* ETB */\
	"",				/* CAN */\
	"",				/* EM */\
	"",				/* SUB */\
	"",				/* ESC */\
	"",				/* FS */\
	"",				/* GS */\
	"",				/* RS */\
	"",				/* US */\
	" ",			/* SPACE */\
	"!",\
	"&quot;",\
	"#",\
	"$",\
	"%",\
	"&amp;",\
	"'",\
	"(",\
	")",\
	"*",\
	"+",\
	",",\
	"-",\
	".",\
	"/",\
	"0",\
	"1",\
	"2",\
	"3",\
	"4",\
	"5",\
	"6",\
	"7",\
	"8",\
	"9",\
	":",\
	";",\
	"&lt;",\
	"=",\
	"&gt;",\
	"?",\
	"@",\
	"A",\
	"B",\
	"C",\
	"D",\
	"E",\
	"F",\
	"G",\
	"H",\
	"I",\
	"J",\
	"K",\
	"L",\
	"M",\
	"N",\
	"O",\
	"P",\
	"Q",\
	"R",\
	"S",\
	"T",\
	"U",\
	"V",\
	"W",\
	"X",\
	"Y",\
	"Z",\
	"[",\
	"\\\\",\
	"]",\
	"^",\
	"_",\
	"`",\
	"a",\
	"b",\
	"c",\
	"d",\
	"e",\
	"f",\
	"g",\
	"h",\
	"i",\
	"j",\
	"k",\
	"l",\
	"m",\
	"n",\
	"o",\
	"p",\
	"q",\
	"r",\
	"s",\
	"t",\
	"u",\
	"v",\
	"w",\
	"x",\
	"y",\
	"z",\
	"\{",\
	"|",\
	"\}",\
	"~",\
	"",				/* DEL */\
	"&nbsp",\
	"&Agrave",\
	"&Aacute",\
	"&Acirc",\
	"&Atilde",\
	"&Auml",\
	"&Aring",\
	"&Ccedil",\
	"&Egrave",\
	"&Eacute",\
	"&Ecirc",\
	"&Euml",\
	"&Igrave",\
	"&Iacute",\
	"&Icirc",\
	"&Iuml",\
	"&ETH", 		// 0x90\
	"&Ntilde",\
	"&Ograve",\
	"&Oacute",\
	"&Ocirc",\
	"&Otilde",\
	"&Ouml",\
	"&Ugrave",\
	"&Uacute",\
	"&Ucirc",\
	"&Uuml",\
	"&Yacute",\
	"&THORN",\
	"mu",\
	" x ",\
	" / ",\
	"(c)",			// 0xA0\
	" ! ",\
	" cents ",\
	" Pound ",\
	"/",\
	" Yen ",\
	" Florin ",\
	" Section ",\
	" Currency ",\
	"'",\
	"``",\
	"&lt;&lt;",\
	"&lt;",\
	"&gt;",\
	"fi",\
	"fl",\
	"(R)", 			// 0xB0\
	"-",\
	" * ", 			// dagger\
	" ** ", 		// dubdagger\
	" . ", 			// centered period\
	" | ", 			// broken pipe\
	" P ", 			// Paragraph\
	"*", 			// bullet\
	",", 			// low-quote\
	",,", 			// low dubquote\
	"''", 			// up dub\
	"&gt;&gt;",\
	"...",\
	"%%", 			// per thousand\
	"!", 			// not!\
	"?", 			// upside down ? \
	"1", 			// 0xC0\
	"`",\
	"'",\
	"^",\
	"~",\
	"-", 			// macron\
	"\\\\/", 			// breve\
	".", \
	"..", 			// uml\
	"2",\
	"o",\
	",", 			// cedilla\
	"3",\
	"''", \
	",", 			// backward cedilla\
	"\\\\/", 			// caron\
	"--", 			// 0xD0\
	"+/-",\
	"(1/4)",\
	"(1/2)",\
	"(3/4)",\
	"&agrave;",\
	"&aacute;",\
	"&acirc;",\
	"&atilde;",\
	"&auml;",\
	"&aring;",\
	"&ccedil;",\
	"&egrave;",\
	"&eacute;",\
	"&ecirc;",\
	"&euml;",\
	"&igrave;", 	//0xE0\
	"&AElig;",\
	"&iacute;",\
	"a",\
	"&icirc;",\
	"&iuml;",\
	"&eth;",\
	"&ntilde;",\
	"L",\
	"&Oslash;",\
	"OE",\
	"o",\
	"&ograve;",\
	"&oacute;",	\
	"&ocirc;",\
	"&otilde;",\
	"&ouml;",\
	"&aelig;", \
	"&ugrave;",\
	"&uacute;",\
	"&ucirc;", \
	"i",\
	"&uuml;",\
	"&yacute;",\
	"l", \
	"&oslash;",\
	"oe",\
	"B",\
	"&thorn;", \
	"&yuml;",\
	"",\
	""\
\};
}
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.