-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
pilx86.e
16732 lines (16236 loc) · 747 KB
/
pilx86.e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
--
-- pilx86.e
-- (created 14/3/09, from pilxl.e+pgscan.e+pltype.e)
--
-- implements ilxlate(), the translation of intermediate code into x86 binary.
--
--EXCEPT
--Xconstant DBGCALL = 0 -- Phix implements call/return as mov [ebp+16],<return addr>;
constant DBGCALL = 0 -- Phix implements call/return as mov [ebp+28],<return addr>;
-- jmp trn_il, to avoid system stack limitations/fiddlies,
-- but makes working in OllyDbg etc harder. Setting this to
-- 1 replaces the jmp with call;add esp,4 (X32 only).
constant debugleak = 01 -- Backend diagnostic option, checks that the number of
-- bytes allocated and the number of bytes freed match.
-- (nb internal routines only, does not cover explicit
-- calls to allocate() and free(), due to things
-- like mem0 = allocate(4) in database.e)
-- When this is 1, emit a slew of opCleanup to facilitate
-- the test, ie deref all referenced vars to expose the
-- presence of any orphaned stuff.
-- NB Must match p.exe, read in or running in memory;
-- you should NOT try to change this setting.
-- I was going to switch this off, to gain a little extra
-- speed, for all live releases, but it makes so little
-- difference it may as well be left on all the time.
-- You can switch off opCleanup by ending the main program
-- with abort(0) [at the top level], if that helps any.
constant J_SWITCHED = -53174 -- an opJne/eq/if/not x86 offset when it is not
-- a real jump but instead the corresponding jump
-- table entry was updated - with the address of
-- where the jump would have been, rather than
-- the address the jump would have jumped to, btw.
-- (btw: -53174 is a naff/"leet" "swit(c)h").
-- in pglobals.e:
--constant newBase = 2 -- 0 = old style, with base @ ref*4-20 on seq/str
-- 2 = new style, with slack @ ref*4-20 for seq only.
-- (nb: an error occurs if debugleak and newBase do not
-- match the backend.)
--
-- Technical note / Vague-ness-ism warning.
-- ========================================
-- The concept of "take part info" is rather vague, although it does produce
-- better results. While I am fairly clear in my own head of the effects of
-- getting it wrong, I cannot really say the same about why it is right, or
-- adequately express its place within a repeated iterative analysis.
--
-- This only ever occurs when a variable is on both sides of an assigmnent.
-- A statement such as v = v&x (along with several other forms of concat,
-- append, prepend, etc) can rely on the existing element types of v, just
-- as long as we know/have proved that v cannot be an atom. This prevents
-- a later eg v = 1.5 from "forgetting that elements of v can be floats".
-- In practice, if we're confident about the element types being ok, then
-- we can be sure all the rest (well, gtype and length) are too. In some
-- cases it can also reduce the number of iterations needed.
--
-- If any of this vagueness worries you, think:
-- * about repeated iterations through the code.
-- * order of code analysis may differ from order of execution.
-- * using info learnt on earlier passes to improve each iteration.
-- * never assuming any info we don't yet fully have.
--
-- Hopefully someone else can explain all this a bit more clearly, and
-- maybe even create a better maxim than "not an atom".
--
--
--constant cstats=01
----integer s1,s1i,s1is,s1ip,stot
---- s1 = 0
---- s1i = 0
---- s1is = 0
---- s1ip = 0
--integer a,ai,aiii,atot
-- a = 0 ai = 0 aiii = 0
--global procedure show_cstats()
-- if cstats then
------stot = s1+s1i+s1is+s1ip
------printf(1,"opSubse1:%d[%3.2f%%], opSubse1i:%d[%3.2f%%], opSubse1is:%d[%3.2f%%], opSubse1ip:%d[%3.2f%%]\n",
------ {s1,s1/stot, s1i,s1i/stot, s1is,s1is/stot, s1ip,s1ip/stot})
--atot = a+ai+aiii
--printf(1,"opAdd:%d[%3.2f%%], opAddi:%d[%3.2f%%], opAddiii:%d[%3.2f%%]\n",
-- {a,a/atot, ai,ai/atot, aiii,aiii/atot})
-- end if
--end procedure
----DEV not Phix...
--function o(integer d1, integer d2, integer d3)
---- octal digit, eg o(3,7,7) returns #FF aka 255
-- return d1*#40+d2*#08+d3 -- d1*0o100+d2*0o10+d3
--end function
--
-- x86 instruction opcodes, minimal required set.
-- =============================================
--
-- The use of octal exposes some of the xrm and sib settings.
-- Credit/blame: I mainly used FASM and OllyDbg to determine the instruction encodings.
-- TIP: run "p -d test.exw" (or better yet "p.exe p.exw -d test.exw", to test mods out
-- before actually attempting to rebuild the compiler) and examine list.asm to verify
-- the binary disassembles correctly. (If there are any idiots in the house, test.exw
-- is something to make yourself.) If you add more instructions here, they probably
-- are already supported by p2asm.e, except perhaps some recentish P6+/64-bit stuff.
-- Please note that these constants are really intended to make mistakes easier to spot,
-- rather than (although I hope they help a bit) trying to make x86 binary easy, which
-- is of course an almost impossible task, given how wierd & convoluted it tends to be.
-- Some items may be badly named, eg "cmp_edx_sib" should probably be "cmp_edx_sibd8".
-- Some instructions have an opcode extension in the xrm, eg shl/shr/sar/etc are all
-- #D1,xSm with S=4/5/7 respectively. These constants are not really a suitable way to
-- encode such instructions, so instead they are just coded using inline hex directly.
-- (update: regimm365 & m_constants do one small subset of such instructions, btw.)
-- Commented out entries below are simply those no longer in use, they should all work.
--
constant
-- add_eax_ecx = {#01,#C8}, -- 0o001 0o310 -- add eax,ecx
-- add_eax_mem32 = {#03,#05}, -- 0o003 0o005 mem32 -- add eax,[mem32]
-- add_edx_eax = {#03,#D0}, -- 0o003 0o320 -- add edx,eax
-- add_edx_ecx = {#03,#D1}, -- 0o003 0o321 -- add edx,ecx
-- add_eax_imm32 = #05, -- 0o005 imm32 -- add eax,imm32
-- or_edx_edx = {#09,#D2}, -- 0o011 0o322 -- or edx,edx
-- or_eax_mem32 = {#0B,#05}, -- 0o013 0o005 mem32 -- or eax,[mem32]
-- or_eax_imm32 = #0D, -- 0o015 imm32 -- or eax,imm32
-- flags
jo_rel32 = {#0F,#80}, -- 0o017 0o200 imm32 -- jo rel32 O = 1
jno_rel32 = {#0F,#81}, -- 0o017 0o201 imm32 -- jno rel32 O = 0
-- jc_rel32 = {#0F,#82}, -- 0o017 0o202 imm32 -- jc rel32 -- == jb, == jnae C = 1
-- jae_rel32 = {#0F,#83}, -- 0o017 0o203 imm32 -- jae rel32 -- == jnc, == jnb C = 0
jnc_rel32 = {#0F,#83}, -- 0o017 0o203 imm32 -- jnc rel32
je_rel32 = {#0F,#84}, -- 0o017 0o204 imm32 -- je rel32 -- == jz Z = 1
jz_rel32 = {#0F,#84}, -- 0o017 0o204 imm32 -- jz rel32
jne_rel32 = {#0F,#85}, -- 0o017 0o205 imm32 -- jne rel32 -- == jnz Z = 0
jnz_rel32 = {#0F,#85}, -- 0o017 0o205 imm32 -- jnz rel32
-- jbe_rel32 = {#0F,#86}, -- 0o017 0o206 imm32 -- jbe rel32 -- == jna C = 1 or Z = 1
-- ja_rel32 = {#0F,#87}, -- 0o017 0o207 imm32 -- ja rel32 -- == jnbe C = 0 and Z = 0
-- js_rel32 = {#0F,#88}, -- 0o017 0o210 imm32 -- js rel32 S = 1
jns_rel32 = {#0F,#89}, -- 0o017 0o211 imm32 -- jns rel32 S = 0
-- jp_rel32 = {#0F,#8A}, -- 0o017 0o212 imm32 -- jp rel32 -- == jpe P = 1
-- jnp_rel32 = {#0F,#8B}, -- 0o017 0o213 imm32 -- jnp rel32 -- == jpo P = 0
jl_rel32 = {#0F,#8C}, -- 0o017 0o214 imm32 -- jl rel32 -- == jnge S != O
jge_rel32 = {#0F,#8D}, -- 0o017 0o215 imm32 -- jge rel32 -- == jnl S = O
jle_rel32 = {#0F,#8E}, -- 0o017 0o216 imm32 -- jle rel32 -- == jng Z = 1 or (S!=O)
jg_rel32 = {#0F,#8F}, -- 0o017 0o217 imm32 -- jg rel32 -- == jnle Z = 0 and (S=O)
sete = {#0F,#94}, -- 0o017 0o224 xrm -- setz r/m8
setne = {#0F,#95}, -- 0o017 0o225 xrm -- setne r/m8
setl = {#0F,#9C}, -- 0o017 0o234 xrm -- setl r/m8
-- setge = {#0F,#9D}, -- 0o017 0o235 xrm -- setge r/m8
-- setle = {#0F,#9E}, -- 0o017 0o236 xrm -- setle r/m8
-- setg = {#0F,#9F}, -- 0o017 0o237 xrm -- setg r/m8
-- and_eax_mem32 = {#23,#05}, -- 0o043 0o005 mem32 -- and eax,[mem32]
and_eax_imm32 = #25, -- 0o045 mem32 -- and eax,imm32
-- sub_eax_mem32 = {#2B,#05}, -- 0o053 0o005 mem32 -- sub eax,[mem32]
-- sub_eax_imm32 = #2D, -- 0o055 imm32 -- sub eax,imm32
xor_eax_eax = {#31,#C0}, -- 0o061 0o300 -- xor eax,eax (== {#33,#C0})
xor_ebx_ebx = {#31,#DB}, -- 0o061 0o333 -- xor ebx,ebx
xor_ecx_ecx = {#31,#C9}, -- 0o063 0o311 -- xor ecx,ecx
xor_edx_edx = {#31,#D2}, -- 0o063 0o322 -- xor edx,edx
-- xor_esi_esi = {#31,#F6}, -- 0o063 0o366 -- xor esi,esi
xor_edi_edi = {#31,#FF}, -- 0o063 0o377 -- xor edi,edi
-- xor_eax_mem32 = {#33,#05}, -- 0o063 0o005 mem32 -- xor eax,[mem32]
-- xor_eax_imm32 = #35, -- 0o065 imm32 -- xor ecx,imm32
-- cmp_reg_regr = #39, -- 0o071 0o3rt -- cmp tst,reg
cmp_reg_reg = #3B, -- 0o073 0o3rt -- cmp reg,tst
cmp_ecx_edx = {#39,#D1}, -- 0o071 0o321 -- cmp ecx,edx
-- cmp_eax_mem32 = {#3B,#05}, -- 0o073 0o005 mem32 -- cmp eax,[mem32]
-- cmp_edx_mem32 = {#3B,#15}, -- 0o073 0o025 mem32 -- cmp edx,[mem32]
-- cmp_edx_sib = {#3B,#54}, -- 0o073 0o124 sib d8 -- cmp edx,[b32+i32*s+d8]
-- cmp_edi_sib = {#3B,#7C}, -- 0o073 0o174 sib d8 -- cmp edi,[b32+i32*s+d8]
-- cmp_eax_esi = {#3B,#C6}, -- 0o073 0o306 -- cmp eax,esi
cmp_eax_imm32 = #3D, -- 0o075 imm32 -- cmp eax,imm32
--DEV invalid in 64-bit mode:
inc_eax = #40, -- 0o100 -- inc eax
-- inc_ecx = #41, -- 0o101 -- inc ecx
-- inc_edx = #42, -- 0o102 -- inc edx
-- dec_eax = #48, -- 0o110 -- dec eax
-- dec_edx = #4A, -- 0o112 -- dec edx
-- dec_edi = #4F, -- 0o117 -- dec edi
push_eax = #50, -- 0o120 -- push eax ..#57 for other regs
push_edx = #52, -- 0o122 -- push edx
push_esi = #56, -- 0o126 -- push esi
pop_eax = #58, -- 0o130 -- pop eax ..#5F for other regs
pop_esi = #5E, -- 0o136 -- pop esi
-- pop_edi = #5F, -- 0o137 -- pop edi
-- pushad = #60, -- 0o140 -- pushad
-- popad = #61, -- 0o141 -- popad
-- wd_prfx = #66, -- 0o146 -- <word prefix>
push_imm32 = #68, -- 0o150 imm32 -- push imm32
push_imm8 = #6A, -- 0o152 imm8 -- push imm8 (as machine word size)
--
-- WARNING: While it may seem daft to emit a 32-bit jump that you /know/ will fit in
-- an 8-bit offset, I have regretted doing so virtually every time.
--
-- All 32-bit jumps are automatically converted to 8-bit form when possible.
-- This involves examining each offset, adjusting it as needed if/when any
-- surrounding/jumped over instructions get shortened, checking whether it
-- fits in a byte and if so patching it into short form.
-- Because of that complexity, it is actually cheaper to have one method of
-- adjusting offsets, rather than one for dwords and one for bytes. Hence
-- there is no "byte offset adjustment", if you use short jumps that is it,
-- the byte offset specified will not be altered even if it ought to be.
--
-- Technically, completely fixed offsets, that do not jump over any 32-bit
-- jumps or isDead blocks and would not benefit from branch straightening,
-- could be output in short form, and that would indeed shave a fraction
-- off compilation time (never actually measured). In a bit of a hissy fit
-- I commented out all short jumps and inserted 32-bit replacements. Just
-- be warned that over-zealous use of short jumps can really bite. The one
-- that caught me out big-time/I totally missed was branch straightening.
-- As long as there is equivalent 32-bit code next to them in case of
-- future emergencies, if you can prove any short jump use actually saves
-- a measureable time, I do not mind them going back in. I have marked up
-- most jumps with [sj OK] or [sj NOT ok] on the understanding there will
-- be extensive testing on your part and no blame on mine.
--
-- Use the -list option to prove things are ending up the way you'd like,
-- whether that be a 32-bit jump you know fits in a byte or an 8-bit jump
-- you are so keen to emit does not (sometimes) land mid-instruction or
-- miss out on a branch straightening opportunity (land on a jmp), etc.
--
-- jo_rel8 = #70, -- 0o160 imm8 -- jo rel8
-- jno_rel8 = #71, -- 0o161 imm8 -- jno rel8
-- jc_rel8 = #72, -- 0o162 imm8 -- jc rel8
-- jnc_rel8 = #73, -- 0o163 imm8 -- jnc rel8
-- jae_rel8 = #73, -- 0o163 imm8 -- jae rel8 -- == jnc, == jnb
-- je_rel8 = #74, -- 0o164 imm8 -- je rel8
-- jz_rel8 = #74, -- 0o164 imm8 -- jz rel8
-- jne_rel8 = #75, -- 0o165 imm8 -- jne rel8
-- jnz_rel8 = #75, -- 0o165 imm8 -- jnz rel8
-- jbe_rel8 = #76, -- 0o166 imm8 -- jbe rel8
-- ja_rel8 = #77, -- 0o167 imm8 -- ja rel8
-- js_rel8 = #78, -- 0o170 imm8 -- js rel8
-- jns_rel8 = #79, -- 0o171 imm8 -- jns rel8
-- jl_rel8 = #7C, -- 0o174 imm8 -- jl rel8
-- jge_rel8 = #7D, -- 0o175 imm8 -- jge rel8
-- jle_rel8 = #7E, -- 0o176 imm8 -- jle rel8
-- jg_rel8 = #7F, -- 0o177 imm8 -- jg rel8
cmpb_sibd8i8 = {#80,#7C}, -- 0o200 0o174 sib d8 i8 -- cmp byte[b32+i32*s+d8],i8
-- cmp_mem32_i32 = {#81,#3D}, -- 0o201 0o075 m32 i32 -- cmp [m32],imm32
-- cmpd_sibd8i32 = {#81,#7C}, -- 0o201 0o174 sib d8 i32 -- cmp dword[b32+i32*s+d8],i32
cmp_reg_imm32 = #81, -- 0o201 0o37r imm32 -- cmp reg,imm32 (not eax)
-- cmp_ecx_imm32 = {#81,#F9}, -- 0o201 0o371 imm32 -- cmp ecx,imm32
-- cmp_edx_imm32 = {#81,#FA}, -- 0o201 0o372 imm32 -- cmp edx,imm32
-- cmp_esi_imm32 = {#81,#FE}, -- 0o201 0o376 imm32 -- cmp esi,imm32
-- cmp_edi_imm32 = {#81,#FF}, -- 0o201 0o377 imm32 -- cmp edi,imm32
-- cmpd_mem32_i8 = {#83,#3D}, -- 0o203 0o075 m32 imm8 -- cmp dword[m32],imm8
addd_subd8i8 = {#83,#44}, -- 0o203 0o104 sib d8 i8 -- add dword[b32+i32*s+d8],i8
subd_sibd8i8 = {#83,#6C}, -- 0o203 0o154 sib d8 i8 -- sub dword[b32+i32*s+d8],i8
cmpd_sibd8i8 = {#83,#7C}, -- 0o203 0o174 sib d8 i8 -- cmp dword[b32+i32*s+d8],i8
-- add_eax_imm8 = {#83,#C0}, -- 0o203 0o300 imm8 -- add eax,imm8
add_esp_imm8 = {#83,#C4}, -- 0o203 0o304 imm8 -- add esp,imm8
-- or_eax_imm8 = {#83,#C8}, -- 0o203 0o310 imm8 -- or eax,imm8
-- and_eax_imm8 = {#83,#E0}, -- 0o203 0o340 imm8 -- and eax,imm8
-- xor_eax_imm8 = {#83,#F0}, -- 0o203 0o360 imm8 -- xor eax,imm8
-- sub_eax_imm8 = {#83,#E8}, -- 0o203 0o350 imm8 -- sub eax,imm8
sub_esp_imm8 = {#83,#EC}, -- 0o203 0o354 imm8 -- sub esp,imm8
-- cmp_eax_imm8 = {#83,#F8}, -- 0o203 0o370 imm8 -- cmp eax,imm8
-- cmp_edx_imm8 = {#83,#FA}, -- 0o203 0o372 imm8 -- cmp edx,imm8
test_reg_reg = #85, -- 0o205 0o3rr -- test reg,reg
-- test_eax_eax = {#85,#C0}, -- 0o205 0o300 -- test eax,eax
test_ecx_ecx = {#85,#C9}, -- 0o205 0o311 -- test ecx,ecx
test_edx_edx = {#85,#D2}, -- 0o205 0o322 -- test edx,edx
xchg = #87, -- 0o207 0o3rr -- xchg reg,reg
-- xchg_esi_edi = {#87,#FE}, -- 0o207 0o376 -- xchg esi,edi
-- mov_sib_al = {#88,#04}, -- 0o210 0o004 sib -- mov [b32+i32*s],al
-- mov_medx_eax = {#89,#02}, -- 0o211 0o002 -- mov [edx],eax
-- mov_sib_eax = {#89,#04}, -- 0o211 0o004 sib -- mov [b32+i32*s],eax
-- mov_mesi_eax = {#89,#06}, -- 0o211 0o006 -- mov [esi],eax
mov_medi_eax = {#89,#07}, -- 0o211 0o007 -- mov [edi],eax
-- mov_mem32_ecx = {#89,#0D}, -- 0o211 0o015 m32 -- mov [m32],ecx
-- mov_mesi_edx = {#89,#16}, -- 0o211 0o026 -- mov [esi],eax
-- mov_mem32_edx = {#89,#15}, -- 0o211 0o025 m32 -- mov [m32],edx
mov_mem32_ebx = {#89,#1D}, -- 0o211 0o035 m32 -- mov [m32],ebx
mov_mem32_esi = {#89,#35}, -- 0o211 0o065 m32 -- mov [m32],esi (or r14 with #4C)
mov_ebpi8_eax = {#89,#45}, -- 0o211 0o105 imm8 -- mov [ebp+imm8],eax
-- mov_ebpi8_edx = {#89,#55}, -- 0o211 0o125 imm8 -- mov [ebp+imm8],edx
mov_ebpi8_ebx = {#89,#5D}, -- 0o211 0o135 imm8 -- mov [ebp+imm8],ebx(0)
mov_rbpi8_r14 = {#89,#75}, -- 0o211 0o165 imm8 -- mov [rbp+imm8],r14 (needs a #4C)
mov_ebpi32_ebx = {#89,#9D}, -- 0o211 0o235 imm32 -- mov [ebp+imm32],ebx(0)
mov_rbpi32_r14 = {#89,#B5}, -- 0o211 0o265 imm32 -- mov [rbp+imm32],r14 (needs a #4C)
-- mov_ecx_eax = {#89,#C1}, -- 0o211 0o301 -- mov ecx,eax -- see 8B version below
-- mov_edx_eax = {#89,#C2}, -- 0o211 0o302 -- mov edx,eax -- ""
-- mov_esi_eax = {#89,#C6}, -- 0o211 0o306 -- mov esi,eax -- ""
-- mov_ecx_edx = {#89,#D1}, -- 0o211 0o321 -- mov ecx,edx -- ""
mov_reg = #89, -- 0o211 0o3sd -- mov dst,src
mov_byte = #8A, -- 0o212 xrm (sib etc) -- mov r8, various
-- mov_al_sib = {#8A,#04}, -- 0o212 0o004 sib -- mov al,[b32+i32]
-- mov_dl_sib = {#8A,#14}, -- 0o212 0o024 sib -- mov dl,[b32+i32]
-- mov_dl_sibd8 = {#8A,#54}, -- 0o212 0o124 sib d8 -- mov dl,[b32+i32*s+d8]
-- mov_al_esid8 = {#8A,#46}, -- 0o212 0o106 d8 -- mov al,[esi+d8]
-- mov_al_esid32 = {#8A,#86}, -- 0o212 0o206 d32 -- mov al,[esi+d32]
-- mov_dl_sibd32 = {#8A,#94}, -- 0o212 0o224 sib d32 -- mov al,[b32+i32*s+d32]
mov_dword = #8B, -- 0o213 xrm (sib etc) -- mov r32, various
-- mov_reg2 = #8B, -- 0o213 0o3ds -- mov dst,src
-- mov_eax_sib = {#8B,#04}, -- 0o213 0o004 sib -- mov eax,[b32+i32*s]
-- mov_eax_medi = {#8B,#07}, -- 0o213 0o007 -- mov eax,[edi]
-- mov_ecx_mem32 = {#8B,#0D}, -- 0o213 0o015 m32 -- mov ecx,[m32]
-- mov_ecx_mesi = {#8B,#0E}, -- 0o213 0o016 -- mov ecx,[esi]
-- mov_edx_sib = {#8B,#14}, -- 0o213 0o024 sib -- mov edx,[b32+i32*s]
-- mov_edx_mem32 = {#8B,#15}, -- 0o213 0o025 m32 -- mov edx,[m32]
-- mov_edx_mesi = {#8B,#16}, -- 0o213 0o026 -- mov edx,[esi]
-- mov_edx_medi = {#8B,#17}, -- 0o213 0o027 -- mov edx,[edi]
-- mov_ebx_mem32 = {#8B,#1D}, -- 0o213 0o035 m32 -- mov ebx,[m32]
-- mov_esi_mem32 = {#8B,#35}, -- 0o213 0o065 m32 -- mov esi,[m32]
-- mov_edi_medx = {#8B,#3A}, -- 0o213 0o072 -- mov edi,[edx]
-- mov_edi_mem32 = {#8B,#3D}, -- 0o213 0o075 m32 -- mov edi,[m32]
mov_eax_ebpd8 = {#8B,#45}, -- 0o213 0o105 d8 -- mov eax,[ebp+d8]
-- mov_eax_esid8 = {#8B,#46}, -- 0o213 0o106 d8 -- mov eax,[esi+d8]
-- mov_eax_edid8 = {#8B,#47}, -- 0o213 0o107 d8 -- mov eax,[edi+d8]
-- mov_edx_ebpd8 = {#8B,#55}, -- 0o213 0o125 d8 -- mov edx,[ebp+d8]
mov_edi_ebpd8 = {#8B,#7D}, -- 0o213 0o175 d8 -- mov edi,[ebp+d8]
-- mov_ecx_sibd8 = {#8B,#4C}, -- 0o213 0o114 sib d8 -- mov ecx,[b32+i32*s+d8]
-- mov_esi_sibd8 = {#8B,#74}, -- 0o213 0o164 sib d8 -- mov esi,[b32+i32*s+d8]
-- mov_eax_esid32 = {#8B,#86}, -- 0o213 0o206 d32 -- mov eax,[esi+d32]
-- mov_eax_edid32 = {#8B,#87}, -- 0o213 0o207 d32 -- mov eax,[edi+d32]
mov_eax_edx = {#8B,#C2}, -- 0o213 0o302 -- mov eax,edx
mov_eax_ebx = {#8B,#C3}, -- 0o213 0o303 -- mov eax,edx
-- mov_ecx_eax = {#8B,#C8}, -- 0o213 0o310 -- mov ecx,eax
mov_ecx_edx = {#8B,#CA}, -- 0o213 0o312 -- mov ecx,edx
mov_ecx_esp = {#8B,#CC}, -- 0o213 0o314 -- mov ecx,esp
mov_edx_eax = {#8B,#D0}, -- 0o213 0o320 -- mov edx,eax
-- mov_edx_ebp = {#8B,#D5}, -- 0o213 0o325 -- mov edx,ebp
mov_esp_ecx = {#8B,#E1}, -- 0o213 0o341 -- mov esp,ecx
mov_esi_eax = {#8B,#F0}, -- 0o213 0o360 -- mov esi,eax
-- mov_edi_ebp = {#8B,#FD}, -- 0o213 0o375 -- mov edi,ebp
-- lea_eax_ecdx = {#8D,#04,#0A},-- 0o215 0o004 0o012 -- lea eax,[ecx+edx]
-- lea_esi_ecdx = {#8D,#34,#0A},-- 0o215 0o064 0o012 -- lea esi,[ecx+edx]
lea = #8D, -- 0o215 xrm [d8/32]
pop_mem32 = {#8F,#05}, -- 0o217 0o005 mem32 -- pop dword[mem32]
pop_ebpi8 = {#8F,#45}, -- 0o217 0o105 imm8 -- pop dword[ebp+imm8]
pop_ebpi32 = {#8F,#85}, -- 0o217 0o205 imm32 -- pop dword[ebp+imm32]
nop = #90, -- 0o220 -- cdq (eax-> edx:eax)
cdq = #99, -- 0o231 -- cdq (eax-> edx:eax)
mov_eax_mem32 = #A1, -- 0o241 m32 -- mov eax,[m32]
mov_mem32_eax = #A3, -- 0o243 m32 -- mov [m32],eax
mov_al_imm8 = #B0, -- 0o260 imm8 -- mov al,imm8
mov_eax_imm32 = #B8, -- 0o270 imm32 -- mov eax,imm32
-- mov_ecx_imm32 = #B9, -- 0o271 imm32 -- mov ecx,imm32
mov_edx_imm32 = #BA, -- 0o272 imm32 -- mov edx,imm32
-- mov_ebx_imm32 = #BB, -- 0o273 imm32 -- mov ebx,imm32
-- mov_esi_imm32 = #BE, -- 0o276 imm32 -- mov esi,imm32
mov_edi_imm32 = #BF, -- 0o277 imm32 -- mov edi,imm32
shl_ecx_imm8 = {#C1,#E1}, -- 0o301 0o341 imm8 -- shl ecx,imm8
-- shl_esi_imm8 = {#C1,#E6}, -- 0o301 0o346 imm8 -- shl esi,imm8
-- shr_eax_imm8 = {#C1,#E8}, -- 0o301 0o350 imm8 -- shr eax,imm8
shr_ecx_imm8 = {#C1,#E9}, -- 0o301 0o351 imm8 -- shr ecx,imm8
-- sar_eax_imm8 = {#C1,#F8}, -- 0o301 0o370 imm8 -- sar eax,imm8
sar_edx_imm8 = {#C1,#FA}, -- 0o301 0o372 imm8 -- sar edx,imm8
mov_m32_imm32 = {#C7,#05}, -- 0o307 0o005 m32 i32 -- mov [m32],imm32
mov_medi_im32 = {#C7,#07}, -- 0o307 0o007 imm32 -- mov [edi],imm32
-- mov_m32sib8i32 = {#C7,#44}, -- 0o307 0o104 sib d8 imm32 -- mov [b32+i32*s+d8],imm32
-- mov_regd8_i32 = #C7, -- 0o307 0o10r d8 imm32 -- mov [reg+d8],imm32
mov_ebpd8_i32 = {#C7,#45}, -- 0o307 0o105 d8 imm32 -- mov [ebp+d8],imm32
mov_edid8_i32 = {#C7,#47}, -- 0o307 0o107 d8 imm32 -- mov [edi+d8],imm32
-- mov_regd32_i32 = #C7, -- 0o307 0o20r d8 imm32 -- mov [reg+d32],imm32
mov_ebpd32_i32 = {#C7,#85}, -- 0o307 0o205 d32 imm32 -- mov [ebp+d32],imm32
mov_edid32_i32 = {#C7,#87}, -- 0o307 0o207 d32 imm32 -- mov [edi+d32],imm32
mov_regimm32 = #C7, -- 0o307 0o30r imm32 -- mov reg,imm32
-- shl_eax_1 = {#D1,#E0}, -- 0o321 0o340 -- shl eax,1
-- shl_ecx_1 = {#D1,#E1}, -- 0o321 0o341 -- shl ecx,1
shl_edx_1 = {#D1,#E2}, -- 0o321 0o342 -- shl edx,1
shl_esi_1 = {#D1,#E6}, -- 0o321 0o346 -- shl esi,1
-- shr_eax_1 = {#D1,#E8}, -- 0o321 0o350 -- shr eax,1
sar_eax_1 = {#D1,#F8}, -- 0o321 0o370 -- sar eax,1
shl_eax_cl = {#D3,#E0}, -- 0o323 0o340 -- shl eax,cl
-- fild_mem32 = {#DB,#05}, -- 0o333 0o005 mem32 -- fild dword[mem32]
-- fistp_d_esp = {#DB,#1C,#24},-- 0o333 0o034 0o044 -- fistp dword[esp]
-- fistp_q_esp = {#DF,#3C,#24},-- 0o337 0o074 0o044 -- fistp qword[esp]
call_rel32 = #E8, -- 0o350 imm32 -- call rel32
jump_rel32 = #E9, -- 0o351 imm32 -- jmp rel32
-- jump_rel8 = #EB, -- 0o353 imm8 -- jmp rel8
-- rep_movsb = {#F3,#A4}, -- 0o363 0o244 -- rep movsb
-- rep_movsd = {#F3,#A5}, -- 0o363 0o244 -- rep movsd
-- rep_stosd = {#F3,#AB}, -- 0o363 0o253 -- rep stosd
tstb_sibd8i8 = {#F6,#44}, -- 0o366 0o104 sib d8 i8 -- test byte[b32+i32*s+d8],i8
-- neg_edx = {#F7,#DA}, -- 0o367 0o332 -- neg edx
imul_ecx = {#F7,#E9}, -- 0o367 0o351 -- imul ecx
idiv_ecx = {#F7,#F9}, -- 0o367 0o371 -- idiv ecx
inc_mem32 = {#FF,#05}, -- 0o377 0o005 mem32 -- inc dword[mem32]
-- dec_mem32 = {#FF,#0D}, -- 0o377 0o015 mem32 -- dec dword[mem32]
--nb mov reg,[ecx+eax*4-8]; inc reg; mov [ecx+eax*4-8],reg offers better opportunties for peephole opts....
-- jmp_mem32 = {#FF,#15}, -- 0o377 0o025 mem32 -- jmp dword[mem32]
jmp_si5_imm32 = {#FF,#24}, -- 0o377 0o044 si5 imm32 -- jmp dword[i*s+imm32]
push_mem32 = {#FF,#35}, -- 0o377 0o065 mem32 -- push dword[mem32]
--DEV to go under newEmit (*2):
incd_sib = {#FF,#44}, -- 0o377 0o104 sib d8 -- inc dword[b32+i32*s+d8]
decd_sib = {#FF,#4C}, -- 0o377 0o114 sib d8 -- dec dword[b32+i32*s+d8]
push_ebpi8 = {#FF,#75}, -- 0o377 0o165 i8 -- push dword[ebp-nn]
push_ebpi32 = {#FF,#B5} -- 0o377 0o265 i32 -- push dword[ebp-nnnn]
-- nb: the above tables contains some xrm bytes, for readability, whereas for the
-- most part they are constructed by hand and commented, eg #C0+reg -- 0o30r.
constant -- sib bytes (nb the naming convention is base_idx[scale])
-- esi_edx = #32, -- 0o062 (==0o026|#16) -- [esi+edx]
-- esi_edi = #3E, -- 0o076 (==0o067|#37) -- [esi+edi]
-- ecx_eax4 = #81, -- 0o201 -- [ecx+eax*4]
ebx_eax4 = #83, -- 0o203 -- [ebx+eax*4]
-- eax_ecx4 = #88, -- 0o210 -- [eax+ecx*4]
-- ebx_ecx4 = #8B, -- 0o213 -- [ebx+ecx*4]
-- ecx_edx4 = #91, -- 0o221 -- [ecx+edx*4]
ebx_edx4 = #93 -- 0o223 -- [ebx+edx*4]
-- ebp_edx4 = #95 -- 0o225 -- [ebp+edx*4]
-- esi_edx4 = #96 -- 0o226 -- [esi+edx*4]
-- ebx_esi4 = #B3, -- 0o263 -- [ebx+esi*4]
-- esi_edi4 = #BE -- 0o276 -- [esi+edi*4]
--
-- generate a most recently used state table.
-- for eax(1), ecx(2), esi(3), edi(4) we want to know which was used last,
-- and when a reg is used promote it to top and lower others as needed.
--
-- Should it appear to be misbehaving, the code from constant eax=0 to
-- procedure merge can easily be copied into a test program, simply
-- comment out any emitHexXx calls.
--
constant eax=0, ecx=1, edx=2, ebx=3,
-- esp=4,
-- ebp=5,
esi=6, edi=7
--if ebx or esp or ebp then end if -- suppress warnings
--
-- Example of isOpCode:
-- emitHex5s({jump_rel32,isOpCode,0,0,opRetf})
-- --> #E9,<byteified VMep[opRetf]-addr_next_instruction>
-- The translation from isOpCode,0,0,VMidx to an offset cannot be performed
-- until we know where the actual code address is, ie after we have decided
-- which routines are required, shortened jumps, removed isDead blocks, etc.
--
--DEV work in progress:
-- Example of isApiFn: (old style, unfinished)
-- #ilasm{call_mem32,%isApiFn,0,0,%opInitialiseCriticalSection}
-- --> #FF,#15,<byteified VMep[opInitialiseCriticalSection]>
-- == jmp dword[<somewhere-in-the-RVA>]
-- Example of isAPIfn: (new style, just started)
-- #ilASM{call "kernel32","FillConsoleOutputCharacterA"}
-- #ilasm{call_mem32,%isAPIfn,"kernel32.dll","HeapAlloc"}
-- --> #FF,#15,<byteified import section thunk>
-- == jmp dword[import section entry]
-- Note that only a few selected windows api calls are currently available, [DEV watch this space]
-- run p -imports or demo\arwendemo\filedump.exw for the full list. Primarily
-- for threads, where the initial setup could not be done via pcfunc.e etc
-- because they would act on the wrong stack (if called before the new stack
-- was properly set up). Also used for fast file I/O [DEV...]
-- I really only ever intend to make a handful available [why?]
-- that help migrate code out of the backend into #ilasm, or prevent more code
-- being added to the backend, though I may be persuaded into other additions.
--
-- One possible alternative might be to create a whole new import section: use [ IN PROGRESS ]
-- (eg) demo/filedump, then in pemit.e make sure you get all those entries plus
-- any others you need, written somewhere else in the file and patching a few
-- RVA and Section values. Point SetExceptionHandler (since that is the first
-- thing to get called) in the original import table to a new block of code
-- which sets the original import table from the new (rather than hunt down
-- and fixup existing references in the VM code) and finally completes the
-- SetExceptionHandler call. Good luck with that ;-)
--
-- Example of isVar:
-- emitHex5v(mov_eax_imm32,p2) -- mov eax,p2 (address of p2)
-- --> #B8,isVar,0,0,idx
-- --> #B8,<byteified DSvaddr+idx*4-4>
--DEV newEBP(?)
-- Note that while tvars get their final idx pretty much at creation time,
-- gvars and consts don't get theirs until very late on, start of finalfixups
-- in fact.
--
--DEV isConstRef no longer valid under newEmit
-- isConstRef and isConstRefCount are used in eg name="fred". Since the compiler
-- created that "fred" and knows exactly where it put it, it is quicker to use a
-- literal ref/address of refcount, rather than load/compute then at run-time.
-- isILa is currently only used in user defined type checking.
-- isIL is currently only used in opFrame/set params/Jmp(#E9),isIL,0,0,routine_no.
-- Obtains the alsolute or relative address of the entry point for a routine.
-- They were both derived from the isAddr concept, although that has since been
-- extended with branch straightening/merged a bit more with isJmp.
--
-- Explanation of isAddr:
-- The calling convention for opcodes such as opMkSq (make sequence) is:
--
-- mov edx,N
-- push L1
-- push items N..1
-- jmp opMkSq
-- L1:
--
-- so that it can pop N items off the stack and return, rather than have to save
-- the return address somewhere before it can start popping. (For clarity, there
-- are two other pushes, result addr and previous value, missing from the above.)
-- isAddr implements the above as a relative offset which is translated to an
-- actual code address, you guessed it, once we know where that is.
--
-- Explanation of isBase:
-- Suppose you have:
-- switch i
-- case 4004
-- ...
-- case 4017
-- end switch
-- Then the jump table can/should contain 14 addresses. The same would be true if
-- the cases ranged from -4 to -17. Rather than add or subtract something from the
-- index before using it on the jump table, we supply a dummy table start which
-- is calculated at compile-time, in the above example that would be 16012 bytes
-- before the actual table. (Of course we may have to do a range check before we
-- index the jump table, but that changes nothing.) isBase operates in a similar
-- manner to isAddr, but without any adjustments for shortened jumps.
--
-- Explanation of isJmp/isShortJmp.
-- To keep things simple (!) the main routine pretty much always emits dword
-- jumps, it is scanforShortJmp() which decides whether the offset will fit
-- in a byte and blurph() which recodes those that do. The scanforShortJmp
-- is a decidedly non-trivial enterprise since shortening one instruction
-- from 5 or 6 bytes to 2 means that all the offsets of any jumps over it
-- must also (ultimately) be adjusted. However, this somewhat frightening
-- complexity is nicely hidden away inside the two mentioned routines and
-- the main parser can and should emit the longer forms, confident that
-- they will be properly dealt with. If you like, you may use "p -d test"
-- (where you supply the test) to prove this to your own satisfaction.
--
-- Explanation of isDead:
-- (No longer in use, but should still work if you need it.)
-- Suppose at "end function" you realise the result is always an integer,
-- hence there is no need to deallocate the result var. Way back, when
-- pmain.e used to emit binary during parsing, it would often patch the
-- dealloc code. For example #20005 means "five dead bytes at this point".
-- scanforShortJmp() is responsible for adjusting the offsets of jumps
-- over isDead blocks: in phase 1 it does the backward jump offsets (only),
-- and leaves forward jumps to phase 2, by when we can use the fwd chain
-- (and by when isDead removal has cleaned out the backward chain).
-- nb #20000,#20001,#20002 are illegal since removal uses two slots:
-- if necessary (ie to remove 1 or 2 bytes) you would need to plant a
-- "dummy" #20003,0,0 which can be patched to #20004/5. Obviously, a
-- #20000 is nonsense since it would clobber the first byte to keep.
-- Planting an isDead is of course far easier than doing a slice
-- on x86 in pmain.e and adjusting all the affected jump offsets
-- and backpatch links by hand there & then.
--
global sequence x86 -- also used by schend.e
--!/**/ #isginfo{x86,0b0100,MIN,MAX,integer,-2} -- Dsq of integer (unknown length)
global integer schidx -- see schend.e
schidx = 0
--
-- dword operands are stored (see eg emitHex5) as follows:
-- flag byte (lsb), [as above, eg isVar]
-- 0,
-- 0,
-- value/offset/index/backpatch link (msb)
--
-- Note that flag bits may be set on the msb, so always scan forward and skip over it.
--
-- The two zeroes in the middle are used by this program for (temporary) linked lists,
-- so again some of the flag bits may therefore end up being set on bytes 2 and 3.
--
integer reginfo = 0,-- "in use" part of mloc/mreg (see below).
-- (makes emptying the table nice and quick)
pfileno = 0
--with trace --DEV (otherwise we get a memory leak!)
--constant m4 = allocate(4),
-- m44 = {m4,4}
---- ,
---- m42 = {m4,2}
procedure emitHexDword(atom v)
-- break up a dword constant into 4 bytes
-- if v<-#80000000 or v>#7FFFFFFF then ?9/0 end if
-- if v<-#80000000 or v>#FFFFFFFF then ?9/0 end if
if v<-#80000000 or v>#FFFFFFFF then ?{"emitHexDword",v,sprintf("%08x",v),"emitline",emitline,"pfileno",pfileno} ?9/0 end if
atom m4 = allocate(4)
poke4(m4, v) -- faster than doing divides etc. (idea from database.e)
string s = peek({m4,4})
free(m4)
--DEV tryme (ditto below)
-- x86 &= s
for i=1 to 4 do
x86 &= s[i]
end for
end procedure
procedure emitHexQuadword(atom v)
-- break up a dword constant into 8 bytes
--DEV we should really check this is only invoked for 64-bit.
-- (above check stands for 32-bit, but the solution would
-- not be to call this, instead emit a proper float-ref.)
atom m8 = allocate(8)
poke8(m8, v) -- faster than doing divides etc. (idea from database.e)
string s = peek({m8,8})
free(m8)
-- x86 &= s
for i=1 to 8 do
x86 &= s[i]
end for
end procedure
procedure emitHexWord(atom v)
-- break up a word constant into 2 bytes
string s
atom m2 = allocate(2)
poke2(m2, v) -- faster than doing divides etc. (idea from database.e)
s = peek({m2,2})
free(m2)
for i=1 to 2 do
x86 &= s[i]
end for
end procedure
-- for linking up isBase/isAddr/isJmp(/isDead):
integer q86first,
q86last,
-- ... and for all isOther: (q86>1)
q86f2,
q86l2
--DEV: the following could probably be improved on, since q86first/last
-- have been built in order, in the main part of ilxlate(), and the
-- opAsm chain(s) being processed at the end of ilxlate() are also in
-- order, a "merge" rather than this "one at a time, always starting
-- from q86first" theoretically ought to be faster. However, that said
-- most apps won't have a significant number of opAsm to deal with, &
-- also note opLn breakage means that the "chains" on each opAsm will
-- actually only be one entry, so where we currently call this is not
-- appropriate, instead we'd want to reprocess the entire opAsm chain,
-- but still cope with >1 entry per opAsm block.
--with trace
procedure q86insert(integer k)
-- maintain <isJmp,nxt,prv,offset> as an ordered linked list:
-- (the ordering is simply just the position in the code)
-- (k points at isJmp; q86first/q86last limit existing list)
-- (next/prv pointers are all idx to an isJmp)
-- -- (see also demo\lnklst.exw)
integer this, next
--trace(1)
if q86first then
if k<q86first then
x86[k+1] = q86first -- set next
x86[k+2] = 0 -- prev
x86[q86first+2] = k -- original first's prev
q86first = k
else
this = q86first
while 1 do
next = x86[this+1]
if next=0 or next>k then exit end if
this = next
end while
x86[this+1] = k -- link prior node to new
x86[k+1] = next -- set next on new
x86[k+2] = this -- set prev on new
if next then
x86[next+2] = k -- set prev on any following node
else
q86last = k
end if
end if
else
q86first = k
x86[k+1] = 0 -- next = 0
x86[k+2] = 0 -- prev = 0
q86last = k
end if
end procedure
procedure quad(integer isFlag, integer offset)
-- Emit & linkup an {isBase/isAddr/isJmp,next,prev,offset} quad.
-- next/prev are used to find these things in pemit.e,
-- scanforShortJmp/blurph, during the final fixups.
integer w
if q86=0 then ?9/0 end if
x86 &= isFlag -- isBase/isAddr/isJmp
w = length(x86)
x86 &= 0
x86 &= q86last
x86 &= offset
if q86last then
x86[q86last+1] = w
else
q86first = w
end if
q86last = w
end procedure
--DEV not (yet) in use (q86 is still==1, ilasm in pmain.e would also need fixing)
procedure quad2(integer isOther, integer i)
-- Emit & linkup an {isOther,next,prev,i} quad.
-- (ie one of isOpCode/isVar/isConstRef[Count]/isIL[a])
-- next/prev are used to find these things in pemit.e,
-- scanforShortJmp/blurph, during the final fixups.
-- (that is when not binding/fast "in situ" processing)
integer w
if q86<=1 then ?9/0 end if
if newEmit then
if isOther=isOpCode then ?9/0 end if
-- isJmpG/isVar/isVno/isConstRef/isConstRefCount/isIL/isILa
end if
x86 &= isOther -- isIL[a]/isOpCode/isConstRef[Count]
w = length(x86)
x86 &= 0
x86 &= q86l2
x86 &= i -- routine no, opcode, or var no
if q86l2 then
x86[q86l2+1] = w
else
q86f2 = w
end if
q86l2 = w
end procedure
--DEV 18/6/2013:
--integer opLnv, oplnlen, currRtn
integer opLnv, oplnlen
--q86 checked
--sequence call_op
-- call_op = {call_rel32,isOpCode,0,0,0}
integer thisDbg
thisDbg = 0
--integer callopTraceFirst = 0
forward procedure emitHex5callG(integer opcode, integer lblidx=0)
forward procedure movRegImm32(integer reg, atom v)
--with trace
--global -- used by psched.e [DEV]
procedure lineinfo()
-- For a proper explanation of LineTab, see pdiag.e (this was written
-- in a relatively ad-hoc manner, sorry).
integer firstline, skipline
-- if lastline!=emitline then -- now inlined
if thisDbg then
firstline = symtab[currRtn][S_1stl]
--?{skipline,emitline,ltline,lastline,LineTab,oplnlen}
skipline = emitline-firstline+1
ltline += 1
if skipline!=ltline then
-- add negative count of lines which emitted no code:
if DEBUG then
-- (has been caused by not setting emitline correctly)
if ltline>skipline then ?9/0 end if -- major guff
end if
LineTab = append(LineTab,ltline-skipline)
ltline = skipline
end if
-- add start offset for this line:
if DEBUG then
if oplnlen!=length(x86) then ?9/0 end if
end if
-- LineTab = append(LineTab,length(x86))
LineTab = append(LineTab,oplnlen)
--if sched then shfixup() end if
if not bind then
if opLnv then -- set in ilxlate()
-- if sched then
-- if schidx then
---- schend()
-- ?9/0 -- do this asap in opLnt etc
-- end if
-- end if
--if callopTraceFirst then
-- leamov(edi,callopTraceFirst) -- lea edi,[src]/mov edi,src
-- emitHex5call(opTrace) -- call opXxxx
-- callopTraceFirst = 0
--end if
--if newEmit then
lastline = emitline
movRegImm32(eax,emitline) -- mov eax,imm32
-- x86 &= mov_eax_imm32
-- emitHexDword(emitline)
if opLnv=opLnp
or opLnv=opLnpt then
-->NO!! (fileno will always be 1 here!!!)
-- movRegImm32(ecx,fileno) -- mov ecx,imm32
movRegImm32(ecx,pfileno) -- mov ecx,imm32
-- x86 &= mov_ecx_imm32
-- emitHexDword(fileno)
end if
-- ?9/0
--?{opLnv,lastline,emitline}
emitHex5callG(opLnv) -- call :%pLnt/p/pt
opLntpcalled = 1
--else
-- x86 &= mov_eax_imm32
-- emitHexDword(emitline)
-- if q86>1 then
-- x86 &= call_rel32
-- quad2(isOpCode,opLnv)
-- else
-- call_op[5] = opLnv
-- x86 &= call_op
-- end if
--end if
-- all regs trashed
reginfo = 0
end if
end if
end if
lastline = emitline
end procedure
--include psched.e
-- The emitHexNnn routines
-- =======================
-- There are about 20 of these, catering for different instruction lengths and
-- internal makeup. They are all pretty trivial.
--
-- First they check for lineinfo() needed (we note where we are in opLn/p/t/pt,
-- but delay updating tables etc in case we manage to completely optimise away
-- whole chunks of code, and hence we wait until we actually emit something).
--
-- Then they bolt a few bytes on the end of x86.
--
-- They exist mainly as a kind of self-documenting code, with some rudimentary
-- type-checking, and to group whole instructions together. As an added bonus,
-- they make it easy to calculate block sizes and offsets. I do not know them
-- all off by heart; instead I look them up at the point of use.
--
-- If you haven't seen #isginfo statements before, they are just a simple way
-- of performing compile-time type checking. No code whatsoever is emitted,
-- and apart from compiling cleanly/reporting an error, they do not affect
-- compilation in any other way. TIP: any probs, just comment them out - some
-- optimisations might get thwarted, but they are probably insignificant.
--DEV can h5 go with localtype info??
procedure emitHex1(integer op1)
-- emit a one byte opcode, or a one byte literal immediate
-- if not sched then
if lastline!=emitline then lineinfo() end if
-- end if
x86 = append(x86,op1)
end procedure
procedure emitHex2(integer op1, integer op2)
-- emit two literal bytes
-- if not sched then
if lastline!=emitline then lineinfo() end if
-- end if
x86 = append(x86,op1)
x86 = append(x86,op2)
end procedure
--DEV temp replace of above... (do the same for emitHex2s)
procedure emitHexx2(integer op1, integer op2)
-- emit two literal bytes
-- if not sched then
if lastline!=emitline then lineinfo() end if
-- end if
if X64
and op1!=push_imm8
and op1!=mov_al_imm8 then
emitHex1(#48)
end if
x86 = append(x86,op1)
x86 = append(x86,op2)
end procedure
procedure emitHex2s(sequence op2)
-- emit a two byte opcode, passed as a sequence
--/**/ #isginfo{op2,0b0100,MIN,MAX,integer,2} -- sequence of integer length 2
if length(op2)!=2 then ?9/0 end if -- compiler should optimise this away!
-- if not sched then
if lastline!=emitline then lineinfo() end if
-- end if
x86 &= op2
end procedure
procedure emitHex3(sequence op2, integer i8)
-- emit a two byte opcode and a one byte immediate
--DEV 20/6/23 getting 6 (0b0110) vs 4 (0b0100) when compiling p64 with new MAXINT...
--/**/ #isginfo{op2,0b0100,MIN,MAX,integer,2} -- sequence of integer length 2
if length(op2)!=2 then ?9/0 end if -- compiler should optimise this away!
-- if not sched then
if lastline!=emitline then lineinfo() end if
-- end if
x86 &= op2
x86 = append(x86,i8)
end procedure
procedure emitHex3l(integer op1, integer op2, integer op3)
-- emit three literal bytes
-- if not sched then
if lastline!=emitline then lineinfo() end if
-- end if
x86 = append(x86,op1)
x86 = append(x86,op2)
x86 = append(x86,op3)
end procedure
--procedure emitHex3s(sequence op3)
---- emit a three byte operation
----/**/ #isginfo{op3,0b0100,MIN,MAX,integer,3} -- sequence of integer length 3
-- if length(op3)!=3 then ?9/0 end if -- compiler should optimise this away!
----if not sched then
-- if lastline!=emitline then lineinfo() end if
----end if
-- x86 &= op3
--end procedure
procedure emitHex4l(integer op1, integer op2, integer op3, integer op4)
-- emit four literal bytes
-- if not sched then
if lastline!=emitline then lineinfo() end if
-- end if
x86 &= {op1,op2,op3,op4}
end procedure
procedure emitHex4sib(sequence op2, integer op3, integer op4)
-- emit an {opcode,xrm},sib,disp8 instruction, eg dec dword[ebx+edx*4-8]
--/**/ #isginfo{op2,0b0100,MIN,MAX,integer,2} -- sequence of integer length 2
if length(op2)!=2 then ?9/0 end if -- compiler should optimise this away!
-- if not sched then
if lastline!=emitline then lineinfo() end if
-- end if
x86 &= op2
x86 = append(x86,op3)
x86 = append(x86,op4)
end procedure
--q86 checked
sequence v4
v4 = {isVar,0,0,0}
procedure emitHex4v(integer N)
if q86>1 then ?9/0 end if
if symtab[N][S_NTyp]=S_TVar then ?9/0 end if
if and_bits(symtab[N][S_State],K_Fres) then ?9/0 end if
v4[4] = N
x86 &= v4 -- {isVar,0,0,N}
end procedure
--DEV migrate this to main loop, call opStat at loop end if we emitted an isOpCode
--procedure emitHex5(integer op1, integer flags, integer v)
---- emit a one byte opcode and a dword operand, ie (opcode,flags,0,0,value)
---- if emitON then
---- if not sched then
-- if lastline!=emitline then lineinfo() end if
---- end if
-- if collectstats then
-- if not bind and flags=isOpCode then
-- -- Example use of opStat.
-- -- Count times each opcode is actually executed.
-- -- NB does not count "inlined" opcodes, in fact must avoid any partially inlined,
-- -- ie with a jcc @f around it, where the offset is fixed, or anything, eg opMkSq,
-- -- implemented using a push <return addr>. Also note that anything like
-- -- NB: p2/3 obtained from [esp]-9/-14 on error
-- -- will be completely spannered by this intrusion, not that it matters much as
-- -- long as your test program does not actually crash.
-- if not find(v,{opTcFail,opMovsi,opMovti,
-- opSubse,opConcatN,opSubss,opSubse1,opAddiii,
-- opDivi2,opDiviii,opMuliii,opMovbi,opMkSq,opRepe1,
-- opReps,opRepe,opFrame}) then
-- x86 = append(x86,#60) -- pushad = o(1,4,0) -- 60 -- pushad
-- x86 = append(x86,#B8) -- mov_eax_imm32 = o(2,7,0) -- B8 imm32 -- mov eax,opcode
-- emitHexDword(v)
---- emitHex5(call_rel32,isOpCode,opStat)
-- x86 = append(x86,#E8) -- call_rel32 = o(3,5,0) -- E8 imm32 -- call rel32
-- x86 = append(x86,isOpCode)
-- x86 = append(x86,0)
-- x86 = append(x86,0)
-- x86 = append(x86,opStat)
-- x86 = append(x86,#61) -- popad = o(1,4,1) -- 61 -- popad
-- end if
-- end if
-- end if
-- x86 = append(x86,op1)
-- x86 = append(x86,flags)
-- x86 = append(x86,0)
-- x86 = append(x86,0)
-- x86 = append(x86,v)
-- if flags=isOpCode then
-- reginfo = 0
-- end if
---- end if
--end procedure
--DEV newEBP this should (probably) go...
--(q86 checked)
--sequence p5addr
-- p5addr = {push_imm32,isAddr,0,0,0} -- push <return addr>
function emitHex5addr()
-- push a return address for opTchk/MkSq/Subss/Subse/ConcatN/Repe/Reps
-- plants a zero offset and returns the location to backpatch later.
--if not sched then
if lastline!=emitline then lineinfo() end if -- (not currently needed)
--end if
if q86 then