stxncpy.S source code [glibc/sysdeps/alpha/stxncpy.S]

1	/ Copyright (C) 1996-2022 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3
4	The GNU C Library is free software; you can redistribute it and/or
5	modify it under the terms of the GNU Lesser General Public
6	License as published by the Free Software Foundation; either
7	version 2.1 of the License, or (at your option) any later version.
8
9	The GNU C Library is distributed in the hope that it will be useful,
10	but WITHOUT ANY WARRANTY; without even the implied warranty of
11	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12	Lesser General Public License for more details.
13
14	You should have received a copy of the GNU Lesser General Public
15	License along with the GNU C Library. If not, see
16	<https://www.gnu.org/licenses/>. /*
17
18	/ Copy no more than COUNT bytes of the null-terminated string from*
19	SRC to DST.
20
21	This is an internal routine used by strncpy, stpncpy, and strncat.
22	As such, it uses special linkage conventions to make implementation
23	of these public functions more efficient.
24
25	On input:
26	t9 = return address
27	a0 = DST
28	a1 = SRC
29	a2 = COUNT
30
31	Furthermore, COUNT may not be zero.
32
33	On output:
34	t0 = last word written
35	t8 = bitmask (with one bit set) indicating the last byte written
36	t10 = bitmask (with one bit set) indicating the byte position of
37	the end of the range specified by COUNT
38	a0 = unaligned address of the last word* written*
39	a2 = the number of full words left in COUNT
40
41	Furthermore, v0, a3-a5, t11, and t12 are untouched.
42	*/
43
44
45	/ This is generally scheduled for the EV5, but should still be pretty*
46	good for the EV4 too. /*
47
48	#include <sysdep.h>
49
50	.set noat
51	.set noreorder
52
53	.text
54	.type __stxncpy, @function
55	.globl __stxncpy
56	.usepv __stxncpy, no
57
58	cfi_startproc
59	cfi_return_column (t9)
60
61	/ On entry to this basic block:*
62	t0 == the first destination word for masking back in
63	t1 == the first source word. /*
64	.align `3`
65	stxncpy_aligned:
66	/ Create the 1st output word and detect 0's in the 1st input word. /
67	lda t2, -`1` # e1 : build a mask against false zero
68	mskqh t2, a1, t2 # e0 : detection in the src word
69	mskqh t1, a1, t3 # e0 :
70	ornot t1, t2, t2 # .. e1 :
71	mskql t0, a1, t0 # e0 : assemble the first output word
72	cmpbge zero, t2, t7 # .. e1 : bits set iff null found
73	or t0, t3, t0 # e0 :
74	beq a2, $a_eoc # .. e1 :
75	bne t7, $a_eos # .. e1 :
76
77	/ On entry to this basic block:*
78	t0 == a source word not containing a null. /*
79	$a_loop:
80	stq_u t0, `0`(a0) # e0 :
81	addq a0, `8`, a0 # .. e1 :
82	ldq_u t0, `0`(a1) # e0 :
83	addq a1, `8`, a1 # .. e1 :
84	subq a2, `1`, a2 # e0 :
85	cmpbge zero, t0, t7 # .. e1 (stall)
86	beq a2, $a_eoc # e1 :
87	beq t7, $a_loop # e1 :
88
89	/ Take care of the final (partial) word store. At this point*
90	the end-of-count bit is set in t7 iff it applies.
91
92	On entry to this basic block we have:
93	t0 == the source word containing the null
94	t7 == the cmpbge mask that found it. /*
95	$a_eos:
96	negq t7, t8 # e0 : find low bit set
97	and t7, t8, t8 # e1 (stall)
98
99	/ For the sake of the cache, don't read a destination word*
100	if we're not going to need it. /*
101	and t8, `0x80`, t6 # e0 :
102	bne t6, `1f` # .. e1 (zdb)
103
104	/ We're doing a partial word store and so need to combine*
105	our source and original destination words. /*
106	ldq_u t1, `0`(a0) # e0 :
107	subq t8, `1`, t6 # .. e1 :
108	or t8, t6, t7 # e0 :
109	unop #
110	zapnot t0, t7, t0 # e0 : clear src bytes > null
111	zap t1, t7, t1 # .. e1 : clear dst bytes <= null
112	or t0, t1, t0 # e1 :
113
114	`1`: stq_u t0, `0`(a0) # e0 :
115	ret (t9) # e1 :
116
117	/ Add the end-of-count bit to the eos detection bitmask. /
118	$a_eoc:
119	or t10, t7, t7
120	br $a_eos
121
122	.align `3`
123	__stxncpy:
124	/ Are source and destination co-aligned? /
125	lda t2, -`1`
126	xor a0, a1, t1
127	srl t2, `1`, t2
128	and a0, `7`, t0 # find dest misalignment
129	cmovlt a2, t2, a2 # bound neg count to LONG_MAX
130	and t1, `7`, t1
131	addq a2, t0, a2 # bias count by dest misalignment
132	subq a2, `1`, a2
133	and a2, `7`, t2
134	srl a2, `3`, a2 # a2 = loop counter = (count - `1`)/`8`
135	addq zero, `1`, t10
136	sll t10, t2, t10 # t10 = bitmask of last count byte
137	bne t1, $unaligned
138
139	/ We are co-aligned; take care of a partial first word. /
140
141	ldq_u t1, `0`(a1) # e0 : load first src word
142	addq a1, `8`, a1 # .. e1 :
143
144	beq t0, stxncpy_aligned # avoid loading dest word if not needed
145	ldq_u t0, `0`(a0) # e0 :
146	br stxncpy_aligned # .. e1 :
147
148
149	/ The source and destination are not co-aligned. Align the destination*
150	and cope. We have to be very careful about not reading too much and
151	causing a SEGV. /*
152
153	.align `3`
154	$u_head:
155	/ We know just enough now to be able to assemble the first*
156	full source word. We can still find a zero at the end of it
157	that prevents us from outputting the whole thing.
158
159	On entry to this basic block:
160	t0 == the first dest word, unmasked
161	t1 == the shifted low bits of the first source word
162	t6 == bytemask that is -1 in dest word bytes /*
163
164	ldq_u t2, `8`(a1) # e0 : load second src word
165	addq a1, `8`, a1 # .. e1 :
166	mskql t0, a0, t0 # e0 : mask trailing garbage in dst
167	extqh t2, a1, t4 # e0 :
168	or t1, t4, t1 # e1 : first aligned src word complete
169	mskqh t1, a0, t1 # e0 : mask leading garbage in src
170	or t0, t1, t0 # e0 : first output word complete
171	or t0, t6, t6 # e1 : mask original data for zero test
172	cmpbge zero, t6, t7 # e0 :
173	beq a2, $u_eocfin # .. e1 :
174	lda t6, -`1` # e0 :
175	bne t7, $u_final # .. e1 :
176
177	mskql t6, a1, t6 # e0 : mask out bits already seen
178	nop # .. e1 :
179	stq_u t0, `0`(a0) # e0 : store first output word
180	or t6, t2, t2 # .. e1 :
181	cmpbge zero, t2, t7 # e0 : find nulls in second partial
182	addq a0, `8`, a0 # .. e1 :
183	subq a2, `1`, a2 # e0 :
184	bne t7, $u_late_head_exit # .. e1 :
185
186	/ Finally, we've got all the stupid leading edge cases taken care*
187	of and we can set up to enter the main loop. /*
188
189	extql t2, a1, t1 # e0 : position hi-bits of lo word
190	beq a2, $u_eoc # .. e1 :
191	ldq_u t2, `8`(a1) # e0 : read next high-order source word
192	addq a1, `8`, a1 # .. e1 :
193	extqh t2, a1, t0 # e0 : position lo-bits of hi word
194	cmpbge zero, t2, t7 # .. e1 : test new word for eos
195	nop # e0 :
196	bne t7, $u_eos # .. e1 :
197
198	/ Unaligned copy main loop. In order to avoid reading too much,*
199	the loop is structured to detect zeros in aligned source words.
200	This has, unfortunately, effectively pulled half of a loop
201	iteration out into the head and half into the tail, but it does
202	prevent nastiness from accumulating in the very thing we want
203	to run as fast as possible.
204
205	On entry to this basic block:
206	t0 == the shifted low-order bits from the current source word
207	t1 == the shifted high-order bits from the previous source word
208	t2 == the unshifted current source word
209
210	We further know that t2 does not contain a null terminator. /*
211
212	.align `3`
213	$u_loop:
214	or t0, t1, t0 # e0 : current dst word now complete
215	subq a2, `1`, a2 # .. e1 : decrement word count
216	stq_u t0, `0`(a0) # e0 : save the current word
217	addq a0, `8`, a0 # .. e1 :
218	extql t2, a1, t1 # e0 : extract high bits for next time
219	beq a2, $u_eoc # .. e1 :
220	ldq_u t2, `8`(a1) # e0 : load high word for next time
221	addq a1, `8`, a1 # .. e1 :
222	nop # e0 :
223	cmpbge zero, t2, t7 # .. e1 : test new word for eos
224	extqh t2, a1, t0 # e0 : extract low bits for current word
225	beq t7, $u_loop # .. e1 :
226
227	/ We've found a zero somewhere in the source word we just read.*
228	If it resides in the lower half, we have one (probably partial)
229	word to write out, and if it resides in the upper half, we
230	have one full and one partial word left to write out.
231
232	On entry to this basic block:
233	t0 == the shifted low-order bits from the current source word
234	t1 == the shifted high-order bits from the previous source word
235	t2 == the unshifted current source word. /*
236	$u_eos:
237	or t0, t1, t0 # e0 : first (partial) source word complete
238	cmpbge zero, t0, t7 # e0 : is the null in this first bit?
239	bne t7, $u_final # .. e1 (zdb)
240
241	stq_u t0, `0`(a0) # e0 : the null was in the high-order bits
242	addq a0, `8`, a0 # .. e1 :
243	subq a2, `1`, a2 # e0 :
244
245	$u_late_head_exit:
246	extql t2, a1, t0 # e0 :
247	cmpbge zero, t0, t7 # e0 :
248	or t7, t10, t6 # e1 :
249	cmoveq a2, t6, t7 # e0 :
250
251	/ Take care of a final (probably partial) result word.*
252	On entry to this basic block:
253	t0 == assembled source word
254	t7 == cmpbge mask that found the null. /*
255	$u_final:
256	negq t7, t6 # e0 : isolate low bit set
257	and t6, t7, t8 # e1 :
258
259	and t8, `0x80`, t6 # e0 : avoid dest word load if we can
260	bne t6, `1f` # .. e1 (zdb)
261
262	ldq_u t1, `0`(a0) # e0 :
263	subq t8, `1`, t6 # .. e1 :
264	or t6, t8, t7 # e0 :
265	zapnot t0, t7, t0 # .. e1 : kill source bytes > null
266	zap t1, t7, t1 # e0 : kill dest bytes <= null
267	or t0, t1, t0 # e1 :
268
269	`1`: stq_u t0, `0`(a0) # e0 :
270	ret (t9) # .. e1 :
271
272	/ Got to end-of-count before end of string.*
273	On entry to this basic block:
274	t1 == the shifted high-order bits from the previous source word /*
275	$u_eoc:
276	and a1, `7`, t6 # e1 :
277	sll t10, t6, t6 # e0 :
278	and t6, `0xff`, t6 # e0 :
279	bne t6, `1f` # e1 : avoid src word load if we can
280
281	ldq_u t2, `8`(a1) # e0 : load final src word
282	nop # .. e1 :
283	extqh t2, a1, t0 # e0 : extract high bits for last word
284	or t1, t0, t1 # e1 :
285
286	`1`: cmpbge zero, t1, t7
287	mov t1, t0
288
289	$u_eocfin: # end-of-count, final word
290	or t10, t7, t7
291	br $u_final
292
293	/ Unaligned copy entry point. /
294	.align `3`
295	$unaligned:
296
297	ldq_u t1, `0`(a1) # e0 : load first source word
298
299	and a0, `7`, t4 # .. e1 : find dest misalignment
300	and a1, `7`, t5 # e0 : find src misalignment
301
302	/ Conditionally load the first destination word and a bytemask*
303	with 0xff indicating that the destination byte is sacrosanct. /*
304
305	mov zero, t0 # .. e1 :
306	mov zero, t6 # e0 :
307	beq t4, `1f` # .. e1 :
308	ldq_u t0, `0`(a0) # e0 :
309	lda t6, -`1` # .. e1 :
310	mskql t6, a0, t6 # e0 :
311	`1`:
312	subq a1, t4, a1 # .. e1 : sub dest misalignment from src addr
313
314	/ If source misalignment is larger than dest misalignment, we need*
315	extra startup checks to avoid SEGV. /*
316
317	cmplt t4, t5, t8 # e1 :
318	extql t1, a1, t1 # .. e0 : shift src into place
319	lda t2, -`1` # e0 : for creating masks later
320	beq t8, $u_head # e1 :
321
322	mskqh t2, t5, t2 # e0 : begin src byte validity mask
323	cmpbge zero, t1, t7 # .. e1 : is there a zero?
324	extql t2, a1, t2 # e0 :
325	or t7, t10, t5 # .. e1 : test for end-of-count too
326	cmpbge zero, t2, t3 # e0 :
327	cmoveq a2, t5, t7 # .. e1 :
328	andnot t7, t3, t7 # e0 :
329	beq t7, $u_head # .. e1 (zdb)
330
331	/ At this point we've found a zero in the first partial word of*
332	the source. We need to isolate the valid source data and mask
333	it into the original destination data. (Incidentally, we know
334	that we'll need at least one byte of that original dest word.) /*
335
336	ldq_u t0, `0`(a0) # e0 :
337	negq t7, t6 # .. e1 : build bitmask of bytes <= zero
338	mskqh t1, t4, t1 # e0 :
339	and t6, t7, t8 # .. e1 :
340	subq t8, `1`, t6 # e0 :
341	or t6, t8, t7 # e1 :
342
343	zapnot t2, t7, t2 # e0 : prepare source word; mirror changes
344	zapnot t1, t7, t1 # .. e1 : to source validity mask
345
346	andnot t0, t2, t0 # e0 : zero place for source to reside
347	or t0, t1, t0 # e1 : and put it there
348	stq_u t0, `0`(a0) # e0 :
349	ret (t9) # .. e1 :
350
351	cfi_endproc
352

source code of glibc/sysdeps/alpha/stxncpy.S