hi
i want create a commande line tool for arm64 et intel x86-64
Xcode 16 macbook pro M3
my setting :
and
but my application compile only for arm64
who can explain to me ?
Selecting any option will automatically load the page
Post
Replies
Boosts
Views
Activity
hello,
i wrote a method with Neon for accelerate my code..
I don't have the expected result
only simples instructions and, or, shift... on 64bits
no gain speed. nada :-(
i dont understand... how programm this method for speed?
original code :
unsigned long long RXBitBoard::get_legal_moves(const unsigned long long p_discs, const unsigned long long o_discs) {
const unsigned long long inner_o_discs = o_discs & 0x7E7E7E7E7E7E7E7EULL;
/* direction W */
unsigned long long
flipped = (p_discs >> 1) & inner_o_discs;
flipped |= (flipped >> 1) & inner_o_discs;
unsigned long long adjacent_o_discs = inner_o_discs & (inner_o_discs >> 1);
flipped |= (flipped >> 2) & adjacent_o_discs;
flipped |= (flipped >> 2) & adjacent_o_discs;
unsigned long long legals = flipped >> 1;
// /* direction _E*/
// flipped = (p_discs << 1) & inner_o_discs;
// flipped |= (flipped << 1) & inner_o_discs;
//
// adjacent_o_discs = inner_o_discs & (inner_o_discs << 1);
//
// flipped |= (flipped << 2) & adjacent_o_discs;
// flipped |= (flipped << 2) & adjacent_o_discs;
//
// legals |= flipped << 1;
// trick
/* direction _E */
flipped = (p_discs << 1);
legals |= ((flipped + inner_o_discs) & ~flipped);
/* direction S */
flipped = (p_discs >> 8) & o_discs;
flipped |= (flipped >> 8) & o_discs;
adjacent_o_discs = o_discs & (o_discs >> 8);
flipped |= (flipped >> 16) & adjacent_o_discs;
flipped |= (flipped >> 16) & adjacent_o_discs;
legals |= flipped >> 8;
/* direction N */
flipped = (p_discs << 8) & o_discs;
flipped |= (flipped << 8) & o_discs;
adjacent_o_discs = o_discs & (o_discs << 8);
flipped |= (flipped << 16) & adjacent_o_discs;
flipped |= (flipped << 16) & adjacent_o_discs;
legals |= flipped << 8;
/* direction NE */
flipped = (p_discs >> 7) & inner_o_discs;
flipped |= (flipped >> 7) & inner_o_discs;
adjacent_o_discs = inner_o_discs & (inner_o_discs >> 7);
flipped |= (flipped >> 14) & adjacent_o_discs;
flipped |= (flipped >> 14) & adjacent_o_discs;
legals |= flipped >> 7;
/* direction SW */
flipped = (p_discs << 7) & inner_o_discs;
flipped |= (flipped << 7) & inner_o_discs;
adjacent_o_discs = inner_o_discs & (inner_o_discs << 7);
flipped |= (flipped << 14) & adjacent_o_discs;
flipped |= (flipped << 14) & adjacent_o_discs;
legals |= flipped << 7;
/* direction NW */
flipped = (p_discs >> 9) & inner_o_discs;
flipped |= (flipped >> 9) & inner_o_discs;
adjacent_o_discs = inner_o_discs & (inner_o_discs >> 9);
flipped |= (flipped >> 18) & adjacent_o_discs;
flipped |= (flipped >> 18) & adjacent_o_discs;
legals |= flipped >> 9;
/* direction SE */
flipped = (p_discs << 9) & inner_o_discs;
flipped |= (flipped << 9) & inner_o_discs;
adjacent_o_discs = inner_o_discs & (inner_o_discs << 9);
flipped |= (flipped << 18) & adjacent_o_discs;
flipped |= (flipped << 18) & adjacent_o_discs;
legals |= flipped << 9;
//Removes existing discs
legals &= ~(p_discs | o_discs);
return legals;
}
my neon code
const uint64x2_t pp_discs = vdupq_n_u64(p_discs);
const uint64x2_t oo_discs = vdupq_n_u64(o_discs);
const uint64x2_t inner_oo_discs = vdupq_n_u64(o_discs & 0x7E7E7E7E7E7E7E7EULL);
//horizontals directions -1, +1
static const int64x2_t shift_1 = {-1, 1};
static const int64x2_t shift_2 = {-2, 2};
uint64x2_t
flipped = vandq_u64(vshlq_u64(pp_discs, shift_1), inner_oo_discs);
flipped = vorrq_u64(flipped, vandq_u64(vshlq_u64(flipped, shift_1), inner_oo_discs));
uint64x2_t
adjacent_oo_discs = vandq_u64(inner_oo_discs, vshlq_u64(inner_oo_discs, shift_1));
flipped = vorrq_u64(flipped, vandq_u64(vshlq_u64(flipped, shift_2), adjacent_oo_discs));
flipped = vorrq_u64(flipped, vandq_u64(vshlq_u64(flipped, shift_2), adjacent_oo_discs));
uint64x2_t legals = vshlq_u64(flipped, shift_1);
//verticals directions -8 , +8
static const int64x2_t shift_8 = {-8, 8};
static const int64x2_t shift_16 = {-16, 16};
flipped = vandq_u64(vshlq_u64(pp_discs, shift_8), oo_discs);
flipped = vorrq_u64(flipped, vandq_u64(vshlq_u64(flipped, shift_8), oo_discs));
adjacent_oo_discs = vandq_u64(oo_discs, vshlq_u64(oo_discs, shift_8));
flipped = vorrq_u64(flipped, vandq_u64(vshlq_u64(flipped, shift_16), adjacent_oo_discs));
flipped = vorrq_u64(flipped, vandq_u64(vshlq_u64(flipped, shift_16), adjacent_oo_discs));
legals = vorrq_u64(legals, vshlq_u64(flipped, shift_8));
//diagonals directions -7 , +7
static const int64x2_t shift_7 = {-7, 7};
static const int64x2_t shift_14 = {-14, 14};
flipped = vandq_u64(vshlq_u64(pp_discs, shift_7), inner_oo_discs);
flipped = vorrq_u64(flipped, vandq_u64(vshlq_u64(flipped, shift_7), inner_oo_discs));
adjacent_oo_discs = vandq_u64(inner_oo_discs, vshlq_u64(inner_oo_discs, shift_7));
flipped = vorrq_u64(flipped, vandq_u64(vshlq_u64(flipped, shift_14), adjacent_oo_discs));
flipped = vorrq_u64(flipped, vandq_u64(vshlq_u64(flipped, shift_14), adjacent_oo_discs));
legals = vorrq_u64(legals, vshlq_u64(flipped, shift_7));
//diagonals directions -9 , +9
static const int64x2_t shift_9 = {-9, 9};
static const int64x2_t shift_18 = {-18, 18};
flipped = vandq_u64(vshlq_u64(pp_discs, shift_9), inner_oo_discs);
flipped = vorrq_u64(flipped, vandq_u64(vshlq_u64(flipped, shift_9), inner_oo_discs));
adjacent_oo_discs = vandq_u64(inner_oo_discs, vshlq_u64(inner_oo_discs, shift_9));
flipped = vorrq_u64(flipped, vandq_u64(vshlq_u64(flipped, shift_18), adjacent_oo_discs));
flipped = vorrq_u64(flipped, vandq_u64(vshlq_u64(flipped, shift_18), adjacent_oo_discs));
legals = vorrq_u64(legals, vshlq_u64(flipped, shift_9));
return ((vgetq_lane_u64(legals, 0) | vgetq_lane_u64(legals, 1)) & ~(p_discs | o_discs));
}
i even wrote a interleave version
I can expect a speed gain, no?
hi
i dont know if i post in a good section. i have crash without message in console only error 34 311
what does this code mean?
erreur -311 smFHBlkDispErr : an error occurred during _sDisposePtr (suppression du bloc FHeader).
erreur -34 dskFulErr : full disk
or is this something else?
thank
I'm looking to do conditional programming with NEON. I try the __ARN_NEON tag. it doesn't work
can you help me?
Topic:
Programming Languages
SubTopic:
General
hello,
I would like to run benchmarks on p-cores vs e-cores,
I suspect a difference of more than 50%
is it possible to choose your processor (P or E) to run your program?
nice day
Hi,
is there an intrinsic instruction to shift a vector from another a shift vector?
thank
Topic:
Programming Languages
SubTopic:
General
hi
Question is in the title.. do we already know the length of the vectors?
best regards
hello,
i have this error :
error: accessing build database "/Users/.../Build/Intermediates.noindex/XCBuildData/build.db": disk I/O error
only if i use a custom paths for derived data...
how to fix it
Topic:
Developer Tools & Services
SubTopic:
Xcode
Do you know any software that indicates the frequency of the cores in real time?
the macbook pro adapts the frequency of the cores according to the load/temperature it seems to me
thank